xref: /xnu-12377.81.4/bsd/skywalk/nexus/flowswitch/fsw_ip_frag.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2017-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /* $FreeBSD: src/sys/netinet6/frag6.c,v 1.2.2.5 2001/07/03 11:01:50 ume Exp $ */
30 /* $KAME: frag6.c,v 1.31 2001/05/17 13:45:34 jinmei Exp $ */
31 
32 /*
33  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the project nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  */
60 
61 /*
62  * @file
63  * flowswitch IP Reassembly for both v4 and v6
64  *
65  * Implementation of IP packet fragmentation and reassembly.
66  *
67  */
68 
69 #include <sys/domain.h>
70 #include <netinet/in.h>
71 #include <netinet/ip6.h>
72 #include <netinet/icmp6.h>
73 #include <skywalk/os_skywalk_private.h>
74 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
75 #include <skywalk/nexus/flowswitch/fsw_var.h>
76 #include <kern/uipc_domain.h>
77 
78 #define IPFM_MAX_FRAGS_PER_QUEUE        128     /* RFC 791 64K/(512 min MTU) */
79 #define IPFM_MAX_QUEUES                 1024    /* same as ip/ip6 */
80 #define IPFM_FRAG_TTL                   60      /* RFC 2460 */
81 #define IPFM_TIMEOUT_TCALL_INTERVAL     1
82 
83 static uint32_t ipfm_max_frags_per_queue = IPFM_MAX_FRAGS_PER_QUEUE;
84 static uint32_t ipfm_frag_ttl = IPFM_FRAG_TTL;
85 static uint32_t ipfm_timeout_tcall_ival = IPFM_TIMEOUT_TCALL_INTERVAL;
86 
87 #if (DEVELOPMENT || DEBUG)
88 SYSCTL_INT(_kern_skywalk_flowswitch, OID_AUTO,
89     ipfm_max_frags_per_queue, CTLFLAG_RW | CTLFLAG_LOCKED,
90     &ipfm_max_frags_per_queue, 0, "");
91 #endif /* !DEVELOPMENT && !DEBUG */
92 
93 SYSCTL_INT(_kern_skywalk_flowswitch, OID_AUTO, ipfm_frag_ttl,
94     CTLFLAG_RW | CTLFLAG_LOCKED, &ipfm_frag_ttl, 0, "");
95 SYSCTL_INT(_kern_skywalk_flowswitch, OID_AUTO,
96     ipfm_timeout_tcall_ival, CTLFLAG_RW | CTLFLAG_LOCKED,
97     &ipfm_timeout_tcall_ival, 0, "");
98 
99 static LCK_GRP_DECLARE(fsw_ipfm_lock_group, "sk_fsw_ipfm_lock");
100 static LCK_ATTR_DECLARE(fsw_ipfm_lock_attr, 0, 0);
101 
102 /* @internal ip fragment wrapper (chained in an ipfq) for __kern_packet */
103 struct ipf {
104 	struct ipf      *ipf_down;
105 	struct ipf      *ipf_up;
106 	struct __kern_packet *ipf_pkt;
107 	int             ipf_len;        /* fragmentable part length */
108 	int             ipf_off;        /* fragment offset */
109 	uint16_t        ipf_mff;        /* more fragment bit in frag off */
110 };
111 
112 /* @internal ip fragment lookup key */
113 struct ipf_key {
114 	uint64_t        ipfk_addr[4];   /* src + dst ip addr (v4/v6) */
115 	uint32_t        ipfk_ident;     /* IP identification */
116 	uint16_t        ipfk_len;       /* len of ipfk_addr field */
117 };
118 
119 enum {
120 	IPFK_LEN_V4 = 2 * sizeof(struct in_addr),
121 	IPFK_LEN_V6 = 2 * sizeof(struct in6_addr),
122 };
123 
124 /*
125  * @internal
126  * IP reassembly queue structure.  Each fragment (struct ipf)
127  * being reassembled is attached to one of these structures.
128  */
129 struct ipfq {
130 	struct ipf      *ipfq_down;     /* fragment chain */
131 	struct ipf      *ipfq_up;
132 	struct ipfq     *ipfq_next;     /* queue chain */
133 	struct ipfq     *ipfq_prev;
134 	uint64_t        ipfq_timestamp; /* time of creation */
135 	struct ipf_key  ipfq_key;       /* ipfq search key */
136 	uint16_t        ipfq_nfrag;     /* # of fragments in queue */
137 	int             ipfq_unfraglen; /* len of unfragmentable part */
138 	bool            ipfq_is_dirty;  /* q is dirty, don't use */
139 };
140 
141 /*
142  * @internal (externally opaque)
143  * flowswitch IP Fragment Manager
144  */
145 struct fsw_ip_frag_mgr {
146 	struct skoid    ipfm_skoid;
147 	struct ipfq     ipfm_q;         /* ip reassembly queues */
148 	uint32_t        ipfm_q_limit;   /* limit # of reass queues */
149 	uint32_t        ipfm_q_count;   /* # of allocated reass queues */
150 	uint32_t        ipfm_f_limit;   /* limit # of ipfs */
151 	uint32_t        ipfm_f_count;   /* current # of allocated ipfs */
152 	decl_lck_mtx_data(, ipfm_lock); /* guard reass and timeout cleanup */
153 	thread_call_t   ipfm_timeout_tcall;     /* frag timeout thread */
154 
155 	struct ifnet    *ipfm_ifp;
156 	struct fsw_stats *ipfm_stats;   /* indirect stats in fsw */
157 };
158 
159 static int ipf_process(struct fsw_ip_frag_mgr *, struct __kern_packet **,
160     struct ipf_key *, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t *,
161     uint16_t *);
162 static int ipf_key_cmp(struct ipf_key *, struct ipf_key *);
163 static void ipf_enq(struct ipf *, struct ipf *);
164 static void ipf_deq(struct ipf *);
165 static void ipfq_insque(struct ipfq *, struct ipfq *);
166 static void ipfq_remque(struct ipfq *);
167 static uint32_t ipfq_freef(struct fsw_ip_frag_mgr *mgr, struct ipfq *,
168     void (*)(struct fsw_ip_frag_mgr *, struct ipf *));
169 
170 static void ipfq_timeout(thread_call_param_t, thread_call_param_t);
171 static void ipfq_sched_timeout(struct fsw_ip_frag_mgr *, boolean_t);
172 
173 static struct ipfq *ipfq_alloc(struct fsw_ip_frag_mgr *mgr);
174 static void ipfq_free(struct fsw_ip_frag_mgr *mgr, struct ipfq *q);
175 static uint32_t ipfq_freefq(struct fsw_ip_frag_mgr *mgr, struct ipfq *q,
176     void (*ipf_cb)(struct fsw_ip_frag_mgr *, struct ipf *));
177 static struct ipf *ipf_alloc(struct fsw_ip_frag_mgr *mgr);
178 static void ipf_free(struct fsw_ip_frag_mgr *mgr, struct ipf *f);
179 static void ipf_free_pkt(struct ipf *f);
180 static void ipfq_drain(struct fsw_ip_frag_mgr *mgr);
181 static void ipfq_reap(struct fsw_ip_frag_mgr *mgr);
182 static int ipfq_drain_sysctl SYSCTL_HANDLER_ARGS;
183 static struct mbuf *ipf_pkt2mbuf(struct fsw_ip_frag_mgr *, struct __kern_packet *);
184 static void ipf_icmp6_error_flag(struct mbuf *, int, int, int, int);
185 void ipf_icmp_param_err(struct fsw_ip_frag_mgr *, struct __kern_packet *pkt,
186     int param);
187 void ipf_icmp_timeout_err(struct fsw_ip_frag_mgr *, struct ipf *f);
188 
189 /* Create a flowswitch IP fragment manager. */
190 struct fsw_ip_frag_mgr *
fsw_ip_frag_mgr_create(struct nx_flowswitch * fsw,struct ifnet * ifp,size_t f_limit)191 fsw_ip_frag_mgr_create(struct nx_flowswitch *fsw, struct ifnet *ifp,
192     size_t f_limit)
193 {
194 	struct fsw_ip_frag_mgr *mgr;
195 
196 	ASSERT(ifp != NULL);
197 	mgr = sk_alloc_type(struct fsw_ip_frag_mgr, Z_WAITOK | Z_NOFAIL,
198 	    skmem_tag_fsw_frag_mgr);
199 	mgr->ipfm_q.ipfq_next = mgr->ipfm_q.ipfq_prev = &mgr->ipfm_q;
200 	lck_mtx_init(&mgr->ipfm_lock, &fsw_ipfm_lock_group, &fsw_ipfm_lock_attr);
201 
202 	mgr->ipfm_timeout_tcall =
203 	    thread_call_allocate_with_options(ipfq_timeout, mgr,
204 	    THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
205 	VERIFY(mgr->ipfm_timeout_tcall != NULL);
206 
207 	mgr->ipfm_ifp = ifp;
208 	mgr->ipfm_stats = &fsw->fsw_stats;
209 
210 	/* Use caller provided limit (caller knows pool size) */
211 	ASSERT(f_limit >= 2 && f_limit < UINT32_MAX);
212 	mgr->ipfm_f_limit = (uint32_t)f_limit;
213 	mgr->ipfm_f_count = 0;
214 	mgr->ipfm_q_limit = MIN(IPFM_MAX_QUEUES, mgr->ipfm_f_limit / 2);
215 	mgr->ipfm_q_count = 0;
216 
217 	skoid_create(&mgr->ipfm_skoid, SKOID_DNODE(fsw->fsw_skoid), "ipfm", 0);
218 	skoid_add_uint(&mgr->ipfm_skoid, "frag_limit", CTLFLAG_RW,
219 	    &mgr->ipfm_f_limit);
220 	skoid_add_uint(&mgr->ipfm_skoid, "frag_count", CTLFLAG_RD,
221 	    &mgr->ipfm_f_count);
222 	skoid_add_uint(&mgr->ipfm_skoid, "queue_limit", CTLFLAG_RW,
223 	    &mgr->ipfm_q_limit);
224 	skoid_add_uint(&mgr->ipfm_skoid, "queue_count", CTLFLAG_RD,
225 	    &mgr->ipfm_q_count);
226 	skoid_add_handler(&mgr->ipfm_skoid, "drain", CTLFLAG_RW,
227 	    ipfq_drain_sysctl, mgr, 0);
228 
229 	return mgr;
230 }
231 
232 /* Free a flowswitch IP fragment manager. */
233 void
fsw_ip_frag_mgr_destroy(struct fsw_ip_frag_mgr * mgr)234 fsw_ip_frag_mgr_destroy(struct fsw_ip_frag_mgr *mgr)
235 {
236 	thread_call_t __single tcall;
237 
238 	lck_mtx_lock(&mgr->ipfm_lock);
239 	if ((tcall = mgr->ipfm_timeout_tcall) != NULL) {
240 		lck_mtx_unlock(&mgr->ipfm_lock);
241 		(void) thread_call_cancel_wait(tcall);
242 		(void) thread_call_free(tcall);
243 		mgr->ipfm_timeout_tcall = NULL;
244 		lck_mtx_lock(&mgr->ipfm_lock);
245 	}
246 
247 	ipfq_drain(mgr);
248 
249 	lck_mtx_unlock(&mgr->ipfm_lock);
250 	lck_mtx_destroy(&mgr->ipfm_lock, &fsw_ipfm_lock_group);
251 
252 	skoid_destroy(&mgr->ipfm_skoid);
253 	sk_free_type(struct fsw_ip_frag_mgr, mgr);
254 }
255 
256 /*
257  * Reassemble a received IPv4 fragment.
258  *
259  * @param mgr
260  *   fragment manager
261  * @param pkt
262  *   received packet (must have ipv4 header validated)
263  * @param ip4
264  *   pointer to the packet's IPv4 header
265  * @param nfrags
266  *   number of fragments reassembled
267  * @return
268  *   Successfully processed (not fully reassembled)
269  *     ret = 0, *pkt = NULL(ipfm owns it), *nfrags=0
270  *   Successfully reassembled
271  *     ret = 0, *pkt = 1st fragment(fragments chained in order by pkt_nextpkt)
272  *     *nfrags = number of all fragments (>0)
273  *   Error
274  *     ret != 0 && *pkt unmodified (caller to decide what to do with *pkt)
275  *     *nfrags = 0
276  */
277 int
fsw_ip_frag_reass_v4(struct fsw_ip_frag_mgr * mgr,struct __kern_packet ** pkt,struct ip * ip4,uint16_t * nfrags,uint16_t * tlen)278 fsw_ip_frag_reass_v4(struct fsw_ip_frag_mgr *mgr, struct __kern_packet **pkt,
279     struct ip *ip4, uint16_t *nfrags, uint16_t *tlen)
280 {
281 	struct ipf_key key;
282 	uint16_t unfragpartlen, offflag, fragoff, fragpartlen, fragflag;
283 	int err;
284 	uint8_t *src;
285 
286 	STATS_INC(mgr->ipfm_stats, FSW_STATS_RX_FRAG_V4);
287 
288 	src = (uint8_t *)(struct ip *__bidi_indexable)ip4 +
289 	    offsetof(struct ip, ip_src);
290 	bcopy(src, (void *)key.ipfk_addr, IPFK_LEN_V4);
291 	key.ipfk_len = IPFK_LEN_V4;
292 	key.ipfk_ident = (uint32_t)ip4->ip_id;
293 
294 	unfragpartlen = (uint16_t)(ip4->ip_hl << 2);
295 	offflag = ntohs(ip4->ip_off);
296 	fragoff = (uint16_t)(offflag << 3);
297 	fragpartlen = ntohs(ip4->ip_len) - (uint16_t)(ip4->ip_hl << 2);
298 	fragflag = offflag & IP_MF;
299 
300 	err = ipf_process(mgr, pkt, &key, unfragpartlen, fragoff, fragpartlen,
301 	    fragflag, nfrags, tlen);
302 
303 	/*
304 	 * If packet has been reassembled compute the user data length.
305 	 */
306 	if (*pkt != NULL) {
307 		struct __kern_packet *p = *pkt;
308 		struct ip *__single iph = __unsafe_forge_single(struct ip *,
309 		    (struct ip *)p->pkt_flow_ip_hdr);
310 
311 		p->pkt_flow_ulen = ntohs(iph->ip_len) -
312 		    p->pkt_flow_ip_hlen - p->pkt_flow->flow_l4._l4_hlen;
313 	}
314 	return err;
315 }
316 
317 /*
318  * Reassemble a received IPv6 fragment.
319  *
320  * @param mgr
321  *   fragment manager
322  * @param pkt
323  *   received packet (must have ipv6 header validated)
324  * @param ip6
325  *   pointer to the packet's IPv6 header
326  * @param ip6f
327  *   pointer to the packet's IPv6 Fragment Header
328  * @param nfrags
329  *   number of fragments reassembled
330  * @return
331  *   Successfully processed (not fully reassembled)
332  *     ret = 0, *pkt = NULL(ipfm owns it), *nfrags=0
333  *   Successfully reassembled
334  *     ret = 0, *pkt = 1st fragment(fragments chained in ordrer by pkt_nextpkt)
335  *     *nfrags = number of all fragments (>0)
336  *   Error
337  *     ret != 0 && *pkt unmodified (caller to decide what to do with *pkt)
338  *     *nfrags = 0
339  */
340 int
fsw_ip_frag_reass_v6(struct fsw_ip_frag_mgr * mgr,struct __kern_packet ** pkt,struct ip6_hdr * ip6,struct ip6_frag * ip6f,uint16_t * nfrags,uint16_t * tlen)341 fsw_ip_frag_reass_v6(struct fsw_ip_frag_mgr *mgr, struct __kern_packet **pkt,
342     struct ip6_hdr *ip6, struct ip6_frag *ip6f, uint16_t *nfrags,
343     uint16_t *tlen)
344 {
345 	struct ipf_key key;
346 	ptrdiff_t ip6f_ptroff = (uintptr_t)ip6f - (uintptr_t)ip6;
347 	uint16_t ip6f_off, fragoff, fragpartlen, unfragpartlen, fragflag;
348 	int err;
349 	uint8_t *src;
350 
351 	STATS_INC(mgr->ipfm_stats, FSW_STATS_RX_FRAG_V6);
352 
353 	/* jumbo payload can't contain a fragment header */
354 	if (ip6->ip6_plen == 0) {
355 		*nfrags = 0;
356 		return ERANGE;
357 	}
358 
359 	ASSERT(ip6f_ptroff < UINT16_MAX);
360 	ip6f_off = (uint16_t)ip6f_ptroff;
361 	fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
362 	fragpartlen = ntohs(ip6->ip6_plen) -
363 	    (ip6f_off + sizeof(struct ip6_frag) - sizeof(struct ip6_hdr));
364 	unfragpartlen = ip6f_off;
365 	fragflag = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
366 
367 	/*
368 	 * RFC 6946: Handle "atomic" fragments (offset and m bit set to 0)
369 	 * upfront, unrelated to any reassembly.
370 	 *
371 	 * Flow classifier should process those as non-frag, ipfm shouldn't see
372 	 * them.
373 	 */
374 	ASSERT((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) != 0);
375 
376 	src = (uint8_t *)(struct ip6_hdr *__bidi_indexable)ip6 +
377 	    offsetof(struct ip6_hdr, ip6_src);
378 	bcopy(src, (void *)key.ipfk_addr, IPFK_LEN_V6);
379 	key.ipfk_len = IPFK_LEN_V6;
380 	key.ipfk_ident = ip6f->ip6f_ident;
381 
382 	/*
383 	 * https://tools.ietf.org/html/rfc8200#page-20
384 	 * If the first fragment does not include all headers through an
385 	 * Upper-Layer header, then that fragment should be discarded and
386 	 * an ICMP Parameter Problem, Code 3, message should be sent to
387 	 * the source of the fragment, with the Pointer field set to zero.
388 	 */
389 	if (fragoff == 0) {
390 		struct __kern_packet *p = *pkt;
391 		struct mbuf *m = ipf_pkt2mbuf(mgr, p);
392 		if (__probable(m != NULL)) {
393 			if (!ip6_pkt_has_ulp(m)) {
394 				ipf_icmp6_error_flag(m, ICMP6_PARAM_PROB,
395 				    ICMP6_PARAMPROB_FIRSTFRAG_INCOMP_HDR, 0, 0);
396 				return EINVAL;
397 			} else {
398 				mbuf_freem(m);
399 			}
400 		}
401 	}
402 
403 	err = ipf_process(mgr, pkt, &key, unfragpartlen, fragoff, fragpartlen,
404 	    fragflag, nfrags, tlen);
405 
406 	/*
407 	 * If packet has been reassembled compute the user data length.
408 	 */
409 	if (*pkt != NULL) {
410 		struct __kern_packet *p = *pkt;
411 		struct ip6_hdr *__single ip6h = __unsafe_forge_single(struct ip6_hdr *,
412 		    (struct ip6_hdr *)p->pkt_flow_ip_hdr);
413 
414 		p->pkt_flow_ulen = ntohs(ip6h->ip6_plen) -
415 		    p->pkt_flow->flow_l4._l4_hlen;
416 	}
417 	return err;
418 }
419 
420 static struct mbuf *
ipf_pkt2mbuf(struct fsw_ip_frag_mgr * mgr,struct __kern_packet * pkt)421 ipf_pkt2mbuf(struct fsw_ip_frag_mgr *mgr, struct __kern_packet *pkt)
422 {
423 	unsigned int one = 1;
424 	struct mbuf *__single m = NULL;
425 	struct mbuf *pkt_mbuf = pkt->pkt_mbuf;
426 	uint8_t *buf;
427 	struct ip6_hdr *ip6;
428 	uint32_t l3t_len;
429 	int err;
430 
431 	l3t_len = pkt->pkt_length - pkt->pkt_l2_len;
432 	if (pkt->pkt_link_flags & PKT_LINKF_ETHFCS) {
433 		l3t_len -= ETHER_CRC_LEN;
434 	}
435 
436 	err = mbuf_allocpacket(MBUF_WAITOK, l3t_len, &one, &m);
437 	VERIFY(err == 0);
438 	ASSERT(l3t_len <= mbuf_maxlen(m));
439 
440 	if (pkt->pkt_pflags & PKT_F_MBUF_DATA) {
441 		if ((pkt_mbuf->m_len < l3t_len) &&
442 		    (pkt_mbuf = m_pullup(pkt->pkt_mbuf, l3t_len)) == NULL) {
443 			return NULL;
444 		} else {
445 			pkt->pkt_mbuf = pkt_mbuf;
446 			bcopy(m_mtod_current(pkt->pkt_mbuf) + pkt->pkt_l2_len,
447 			    m_mtod_current(m), l3t_len);
448 		}
449 	} else {
450 		MD_BUFLET_ADDR_ABS(pkt, buf);
451 		buf += (pkt->pkt_headroom + pkt->pkt_l2_len);
452 		bcopy(buf, m_mtod_current(m), l3t_len);
453 	}
454 	m->m_pkthdr.len = m->m_len = l3t_len;
455 
456 	ip6 = mtod(m, struct ip6_hdr *);
457 	/* note for casting: IN6_IS_SCOPE_ doesn't need alignment */
458 	if (IN6_IS_SCOPE_LINKLOCAL((struct in6_addr *)(uintptr_t)&ip6->ip6_src)) {
459 		if (in6_embedded_scope) {
460 			ip6->ip6_src.s6_addr16[1] = htons(mgr->ipfm_ifp->if_index);
461 		}
462 		ip6_output_setsrcifscope(m, mgr->ipfm_ifp->if_index, NULL);
463 	}
464 	if (IN6_IS_SCOPE_EMBED((struct in6_addr *)(uintptr_t)&ip6->ip6_dst)) {
465 		if (in6_embedded_scope) {
466 			ip6->ip6_dst.s6_addr16[1] = htons(mgr->ipfm_ifp->if_index);
467 		}
468 		ip6_output_setdstifscope(m, mgr->ipfm_ifp->if_index, NULL);
469 	}
470 
471 	return m;
472 }
473 
474 /*
475  * Since this function can be called while holding fsw_ip_frag_mgr.ipfm_lock,
476  * we need to ensure we don't enter the driver directly because a deadlock
477  * can happen if this same thread tries to get the workloop lock.
478  */
479 static void
ipf_icmp6_error_flag(struct mbuf * m,int type,int code,int param,int flags)480 ipf_icmp6_error_flag(struct mbuf *m, int type, int code, int param, int flags)
481 {
482 	sk_protect_t protect = sk_async_transmit_protect();
483 	icmp6_error_flag(m, type, code, param, flags);
484 	sk_async_transmit_unprotect(protect);
485 }
486 
487 /*
488  * @internal IP fragment ICMP parameter problem error handling
489  *
490  * @param param
491  *   offending parameter offset, only applicable to ICMPv6
492  */
493 void
ipf_icmp_param_err(struct fsw_ip_frag_mgr * mgr,struct __kern_packet * pkt,int param_offset)494 ipf_icmp_param_err(struct fsw_ip_frag_mgr *mgr, struct __kern_packet *pkt,
495     int param_offset)
496 {
497 	if (pkt->pkt_flow_ip_ver != IPV6_VERSION) {
498 		return;
499 	}
500 
501 	struct mbuf *m = NULL;
502 	m = ipf_pkt2mbuf(mgr, pkt);
503 	if (__probable(m != NULL)) {
504 		ipf_icmp6_error_flag(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
505 		    param_offset, 0);
506 	}
507 
508 	/* m would be free by icmp6_error_flag function */
509 }
510 
511 /* @internal IP fragment ICMP timeout error handling */
512 void
ipf_icmp_timeout_err(struct fsw_ip_frag_mgr * mgr,struct ipf * f)513 ipf_icmp_timeout_err(struct fsw_ip_frag_mgr *mgr, struct ipf *f)
514 {
515 	struct __kern_packet *pkt = f->ipf_pkt;
516 	ASSERT(pkt != NULL);
517 
518 	/* no icmp error packet for ipv4 */
519 	if (pkt->pkt_flow_ip_ver != IPV6_VERSION) {
520 		return;
521 	}
522 
523 	/* only for the first fragment */
524 	if (f->ipf_off != 0) {
525 		return;
526 	}
527 
528 	struct mbuf *m = NULL;
529 	m = ipf_pkt2mbuf(mgr, pkt);
530 	if (__probable(m != NULL)) {
531 		ipf_icmp6_error_flag(m, ICMP6_TIME_EXCEEDED,
532 		    ICMP6_TIME_EXCEED_REASSEMBLY, 0, 0);
533 	}
534 
535 	/* m would be free by icmp6_error_flag function */
536 }
537 
538 /* @internal IP fragment processing, v4/v6 agonistic */
539 int
ipf_process(struct fsw_ip_frag_mgr * mgr,struct __kern_packet ** pkt_ptr,struct ipf_key * key,uint16_t unfraglen,uint16_t fragoff,uint16_t fragpartlen,uint16_t fragflag,uint16_t * nfrags,uint16_t * tlen)540 ipf_process(struct fsw_ip_frag_mgr *mgr, struct __kern_packet **pkt_ptr,
541     struct ipf_key *key, uint16_t unfraglen, uint16_t fragoff,
542     uint16_t fragpartlen, uint16_t fragflag, uint16_t *nfrags, uint16_t *tlen)
543 {
544 	struct __kern_packet *pkt = *pkt_ptr;
545 	struct __kern_packet *pkt_reassed = NULL;
546 	struct ipfq *q, *mq = &mgr->ipfm_q;
547 	struct ipf *f, *f_new, *f_down;
548 	uint32_t nfrags_freed;
549 	int next;
550 	int first_frag = 0;
551 	int err = 0;
552 	int local_ipfq_unfraglen;
553 
554 	*nfrags = 0;
555 
556 	SK_DF(SK_VERB_IP_FRAG, "id %5d  fragoff %5d  fragpartlen %5d  "
557 	    "fragflag 0x%x", key->ipfk_ident, fragoff, fragpartlen, fragflag);
558 
559 	/*
560 	 * Make sure that all fragments except last one have a data length
561 	 * that's a non-zero multiple of 8 bytes.
562 	 */
563 	if (fragflag && (fragpartlen == 0 || (fragpartlen & 0x7) != 0)) {
564 		SK_DF(SK_VERB_IP_FRAG, "frag not multiple of 8 bytes");
565 		STATS_INC(mgr->ipfm_stats, FSW_STATS_RX_FRAG_DROP_BAD_LEN);
566 		ipf_icmp_param_err(mgr, pkt,
567 		    offsetof(struct ip6_hdr, ip6_plen));
568 		return ERANGE;
569 	}
570 
571 	lck_mtx_lock(&mgr->ipfm_lock);
572 
573 	/* find ipfq */
574 	for (q = mq->ipfq_next; q != mq; q = q->ipfq_next) {
575 		if (ipf_key_cmp(key, &q->ipfq_key) == 0) {
576 			if (q->ipfq_is_dirty) {
577 				SK_DF(SK_VERB_IP_FRAG, "found dirty q, skip");
578 				err = EINVAL;
579 				goto done;
580 			}
581 			break;
582 		}
583 	}
584 
585 	/* not found, create new ipfq */
586 	if (q == mq) {
587 		first_frag = 1;
588 
589 		q = ipfq_alloc(mgr);
590 		if (q == NULL) {
591 			STATS_INC(mgr->ipfm_stats,
592 			    FSW_STATS_RX_FRAG_DROP_NOMEM);
593 			err = ENOMEM;
594 			goto done;
595 		}
596 
597 		ipfq_insque(q, mq);
598 		net_update_uptime();
599 
600 		bcopy(key, &q->ipfq_key, sizeof(struct ipf_key));
601 		q->ipfq_down = q->ipfq_up = (struct ipf *)q;
602 		q->ipfq_unfraglen = -1;  /* The 1st fragment has not arrived. */
603 		q->ipfq_nfrag = 0;
604 		q->ipfq_timestamp = _net_uptime;
605 	}
606 
607 	ASSERT(!q->ipfq_is_dirty);
608 
609 	/* this queue has reached per queue frag limit */
610 	if (q->ipfq_nfrag > ipfm_max_frags_per_queue) {
611 		nfrags_freed = ipfq_freefq(mgr, q, NULL);
612 		STATS_ADD(mgr->ipfm_stats,
613 		    FSW_STATS_RX_FRAG_DROP_PER_QUEUE_LIMIT, nfrags_freed);
614 		err = ENOMEM;
615 		goto done;
616 	}
617 
618 	local_ipfq_unfraglen = q->ipfq_unfraglen;
619 
620 	/*
621 	 * If it's the 1st fragment, record the length of the
622 	 * unfragmentable part and the next header of the fragment header.
623 	 * Assume the first fragement to arrive will be correct.
624 	 * We do not have any duplicate checks here yet so another packet
625 	 * with fragoff == 0 could come and overwrite the ipfq_unfraglen
626 	 * and worse, the next header, at any time.
627 	 */
628 	if (fragoff == 0 && local_ipfq_unfraglen == -1) {
629 		local_ipfq_unfraglen = unfraglen;
630 	}
631 
632 	/* Check that the reassembled packet would not exceed 65535 bytes. */
633 	if (local_ipfq_unfraglen + fragoff + fragpartlen > IP_MAXPACKET) {
634 		SK_DF(SK_VERB_IP_FRAG, "frag too big");
635 		STATS_INC(mgr->ipfm_stats, FSW_STATS_RX_FRAG_BAD);
636 		ipf_icmp_param_err(mgr, pkt, sizeof(struct ip6_hdr) +
637 		    offsetof(struct ip6_frag, ip6f_offlg));
638 		err = ERANGE;
639 		goto done;
640 	}
641 
642 	/*
643 	 * If it's the 1st fragment, do the above check for each
644 	 * fragment already stored in the reassembly queue.
645 	 * If an error is found, still return 0, since we don't return
646 	 * ownership of a chain of offending packets back to caller.
647 	 */
648 	if (fragoff == 0) {
649 		for (f = q->ipfq_down; f != (struct ipf *)q; f = f_down) {
650 			f_down = f->ipf_down;
651 			if (local_ipfq_unfraglen + f->ipf_off + f->ipf_len >
652 			    IP_MAXPACKET) {
653 				SK_DF(SK_VERB_IP_FRAG, "frag too big");
654 				STATS_INC(mgr->ipfm_stats,
655 				    FSW_STATS_RX_FRAG_BAD);
656 				ipf_deq(f);
657 				ipf_free_pkt(f);
658 				ipf_free(mgr, f);
659 			}
660 		}
661 	}
662 
663 	f_new = ipf_alloc(mgr);
664 	if (f_new == NULL) {
665 		STATS_INC(mgr->ipfm_stats, FSW_STATS_RX_FRAG_DROP_NOMEM);
666 		err = ENOMEM;
667 		goto done;
668 	}
669 
670 	f_new->ipf_mff = fragflag;
671 	f_new->ipf_off = fragoff;
672 	f_new->ipf_len = fragpartlen;
673 	f_new->ipf_pkt = pkt;
674 
675 	if (first_frag) {
676 		f = (struct ipf *)q;
677 		goto insert;
678 	}
679 
680 	/* Find a segment which begins after this one does. */
681 	for (f = q->ipfq_down; f != (struct ipf *)q; f = f->ipf_down) {
682 		if (f->ipf_off > f_new->ipf_off) {
683 			break;
684 		}
685 	}
686 
687 	/*
688 	 * If any of the fragments being reassembled overlap with any
689 	 * other fragments being reassembled for the same packet,
690 	 * reassembly of that packet must be abandoned and all the
691 	 * fragments that have been received for that packet must be
692 	 * discarded, and no ICMP error messages should be sent.
693 	 *
694 	 * It should be noted that fragments may be duplicated in the
695 	 * network.  Instead of treating these exact duplicate fragments
696 	 * as overlapping fragments, an implementation may choose to
697 	 * detect this case and drop exact duplicate fragments while
698 	 * keeping the other fragments belonging to the same packet.
699 	 *
700 	 * https://tools.ietf.org/html/rfc8200#appendix-B
701 	 *
702 	 * We apply this rule for both for IPv4 and IPv6 here.
703 	 */
704 	if (((f->ipf_up != (struct ipf *)q) &&  /* prev frag spans into f_new */
705 	    (f->ipf_up->ipf_off + f->ipf_up->ipf_len - f_new->ipf_off > 0)) ||
706 	    ((f != (struct ipf *)q) &&  /* f_new spans into next */
707 	    (f_new->ipf_off + f_new->ipf_len - f->ipf_off > 0))) {
708 		STATS_INC(mgr->ipfm_stats, FSW_STATS_RX_FRAG_BAD);
709 		/* Check for exact duplicate offset/length */
710 		if (((f->ipf_up != (struct ipf *)q) &&
711 		    ((f->ipf_up->ipf_off != f_new->ipf_off) ||
712 		    (f->ipf_up->ipf_len != f_new->ipf_len))) ||
713 		    ((f != (struct ipf *)q) &&
714 		    ((f->ipf_off != f_new->ipf_off) ||
715 		    (f->ipf_len != f_new->ipf_len)))) {
716 			SK_DF(SK_VERB_IP_FRAG, "frag overlap");
717 			ipf_free(mgr, f_new);
718 			/* give up over-lapping fragments queue */
719 			SK_DF(SK_VERB_IP_FRAG, "free overlapping queue");
720 			ipfq_freef(mgr, q, NULL);
721 			q->ipfq_is_dirty = true;
722 		} else {
723 			ipf_free(mgr, f_new);
724 			SK_DF(SK_VERB_IP_FRAG, "frag dup");
725 		}
726 		err = ERANGE;
727 		goto done;
728 	}
729 
730 insert:
731 	q->ipfq_unfraglen = local_ipfq_unfraglen;
732 
733 	/*
734 	 * Stick new segment in its place;
735 	 * check for complete reassembly.
736 	 * Move to front of packet queue, as we are
737 	 * the most recently active fragmented packet.
738 	 */
739 	ipf_enq(f_new, f->ipf_up);
740 	q->ipfq_nfrag++;
741 	next = 0;
742 	for (f = q->ipfq_down; f != (struct ipf *)q; f = f->ipf_down) {
743 		/* there is a hole */
744 		if (f->ipf_off != next) {
745 			goto done;
746 		}
747 		next += f->ipf_len;
748 	}
749 	/* we haven't got last frag yet */
750 	if (f->ipf_up->ipf_mff) {
751 		goto done;
752 	}
753 
754 	/*
755 	 * Reassembly is complete; concatenate fragments.
756 	 */
757 	f = q->ipfq_down;
758 	f_down = f->ipf_down;
759 	pkt_reassed = f->ipf_pkt;
760 	*nfrags = 1;
761 	while (f_down != (struct ipf *)q) {
762 		/* chain __kern_packet with pkt_nextpkt ptr */
763 		f->ipf_pkt->pkt_nextpkt = f_down->ipf_pkt;
764 		(*nfrags)++;
765 		(*tlen) += f_down->ipf_len;
766 		f_down = f->ipf_down;
767 		ipf_deq(f);
768 		ipf_free(mgr, f);
769 		f = f_down;
770 		f_down = f->ipf_down;
771 	}
772 	ipf_free(mgr, f);
773 
774 	err = 0;
775 	STATS_INC(mgr->ipfm_stats, FSW_STATS_RX_FRAG_REASSED);
776 	ipfq_remque(q);
777 	ipfq_free(mgr, q);
778 
779 done:
780 	/* ipfm take ownership of, or return assembled packet, if no error */
781 	if (err == 0) {
782 		/* reass'ed packet if done; NULL otherwise */
783 		*pkt_ptr = pkt_reassed;
784 	}
785 	ipfq_sched_timeout(mgr, FALSE);
786 	lck_mtx_unlock(&mgr->ipfm_lock);
787 	return err;
788 }
789 
790 static int
ipf_key_cmp(struct ipf_key * a,struct ipf_key * b)791 ipf_key_cmp(struct ipf_key *a, struct ipf_key *b)
792 {
793 	int d;
794 
795 	if ((d = (a->ipfk_len - b->ipfk_len)) != 0) {
796 		return d;
797 	}
798 
799 	if ((d = (a->ipfk_ident - b->ipfk_ident)) != 0) {
800 		return d;
801 	}
802 
803 	return memcmp(a->ipfk_addr, b->ipfk_addr, a->ipfk_len);
804 }
805 
806 /*
807  * Put an ip fragment on a reassembly chain.
808  * Like insque, but pointers in middle of structure.
809  */
810 static void
ipf_enq(struct ipf * f,struct ipf * up6)811 ipf_enq(struct ipf *f, struct ipf *up6)
812 {
813 	f->ipf_up = up6;
814 	f->ipf_down = up6->ipf_down;
815 	up6->ipf_down->ipf_up = f;
816 	up6->ipf_down = f;
817 }
818 
819 /*
820  * To ipf_enq as remque is to insque.
821  */
822 static void
ipf_deq(struct ipf * f)823 ipf_deq(struct ipf *f)
824 {
825 	f->ipf_up->ipf_down = f->ipf_down;
826 	f->ipf_down->ipf_up = f->ipf_up;
827 }
828 
829 static void
ipfq_insque(struct ipfq * new,struct ipfq * old)830 ipfq_insque(struct ipfq *new, struct ipfq *old)
831 {
832 	new->ipfq_prev = old;
833 	new->ipfq_next = old->ipfq_next;
834 	old->ipfq_next->ipfq_prev = new;
835 	old->ipfq_next = new;
836 }
837 
838 static void
ipfq_remque(struct ipfq * p6)839 ipfq_remque(struct ipfq *p6)
840 {
841 	p6->ipfq_prev->ipfq_next = p6->ipfq_next;
842 	p6->ipfq_next->ipfq_prev = p6->ipfq_prev;
843 }
844 
845 /*
846  * @internal drain reassembly queue till reaching target q count.
847  */
848 static void
_ipfq_reap(struct fsw_ip_frag_mgr * mgr,uint32_t target_q_count,void (* ipf_cb)(struct fsw_ip_frag_mgr *,struct ipf *))849 _ipfq_reap(struct fsw_ip_frag_mgr *mgr, uint32_t target_q_count,
850     void (*ipf_cb)(struct fsw_ip_frag_mgr *, struct ipf *))
851 {
852 	uint32_t n_freed = 0;
853 
854 	LCK_MTX_ASSERT(&mgr->ipfm_lock, LCK_MTX_ASSERT_OWNED);
855 
856 	SK_DF(SK_VERB_IP_FRAG, "draining (frag %d/%d queue %d/%d)",
857 	    mgr->ipfm_f_count, mgr->ipfm_f_limit, mgr->ipfm_q_count,
858 	    mgr->ipfm_q_limit);
859 
860 	while (mgr->ipfm_q.ipfq_next != &mgr->ipfm_q &&
861 	    mgr->ipfm_q_count > target_q_count) {
862 		n_freed += ipfq_freefq(mgr, mgr->ipfm_q.ipfq_prev,
863 		    mgr->ipfm_q.ipfq_prev->ipfq_is_dirty ? NULL : ipf_cb);
864 	}
865 
866 	STATS_ADD(mgr->ipfm_stats, FSW_STATS_RX_FRAG_DROP_REAPED, n_freed);
867 }
868 
869 /*
870  * @internal reap half reassembly queues to allow newer fragment assembly.
871  */
872 static void
ipfq_reap(struct fsw_ip_frag_mgr * mgr)873 ipfq_reap(struct fsw_ip_frag_mgr *mgr)
874 {
875 	_ipfq_reap(mgr, mgr->ipfm_q_count / 2, ipf_icmp_timeout_err);
876 }
877 
878 /*
879  * @internal reap all reassembly queues, for shutdown etc.
880  */
881 static void
ipfq_drain(struct fsw_ip_frag_mgr * mgr)882 ipfq_drain(struct fsw_ip_frag_mgr *mgr)
883 {
884 	_ipfq_reap(mgr, 0, NULL);
885 }
886 
887 static void
ipfq_timeout(thread_call_param_t arg0,thread_call_param_t arg1)888 ipfq_timeout(thread_call_param_t arg0, thread_call_param_t arg1)
889 {
890 #pragma unused(arg1)
891 	struct fsw_ip_frag_mgr *__single mgr = arg0;
892 	struct ipfq *q;
893 	uint64_t now, elapsed;
894 	uint32_t n_freed = 0;
895 
896 	net_update_uptime();
897 	now = _net_uptime;
898 
899 	SK_DF(SK_VERB_IP_FRAG, "run");
900 	lck_mtx_lock(&mgr->ipfm_lock);
901 	q = mgr->ipfm_q.ipfq_next;
902 	if (q) {
903 		while (q != &mgr->ipfm_q) {
904 			q = q->ipfq_next;
905 			elapsed = now - q->ipfq_prev->ipfq_timestamp;
906 			if (elapsed > ipfm_frag_ttl) {
907 				SK_DF(SK_VERB_IP_FRAG, "timing out q id %5d",
908 				    q->ipfq_prev->ipfq_key.ipfk_ident);
909 				n_freed = ipfq_freefq(mgr, q->ipfq_prev,
910 				    q->ipfq_is_dirty ? NULL :
911 				    ipf_icmp_timeout_err);
912 			}
913 		}
914 	}
915 	STATS_ADD(mgr->ipfm_stats, FSW_STATS_RX_FRAG_DROP_TIMEOUT, n_freed);
916 
917 	/* If running out of resources, drain ipfm queues (oldest one first) */
918 	if (mgr->ipfm_f_count >= mgr->ipfm_f_limit ||
919 	    mgr->ipfm_q_count >= mgr->ipfm_q_limit) {
920 		ipfq_reap(mgr);
921 	}
922 
923 	/* re-arm the purge timer if there's work to do */
924 	if (mgr->ipfm_q_count > 0) {
925 		ipfq_sched_timeout(mgr, TRUE);
926 	}
927 	lck_mtx_unlock(&mgr->ipfm_lock);
928 }
929 
930 static void
ipfq_sched_timeout(struct fsw_ip_frag_mgr * mgr,boolean_t in_tcall)931 ipfq_sched_timeout(struct fsw_ip_frag_mgr *mgr, boolean_t in_tcall)
932 {
933 	uint32_t delay = MAX(1, ipfm_timeout_tcall_ival);       /* seconds */
934 	thread_call_t __single tcall = mgr->ipfm_timeout_tcall;
935 	uint64_t now = mach_absolute_time();
936 	uint64_t ival, deadline = now;
937 
938 	LCK_MTX_ASSERT(&mgr->ipfm_lock, LCK_MTX_ASSERT_OWNED);
939 
940 	ASSERT(tcall != NULL);
941 	if (mgr->ipfm_q_count > 0 &&
942 	    (!thread_call_isactive(tcall) || in_tcall)) {
943 		nanoseconds_to_absolutetime(delay * NSEC_PER_SEC, &ival);
944 		clock_deadline_for_periodic_event(ival, now, &deadline);
945 		(void) thread_call_enter_delayed(tcall, deadline);
946 	}
947 }
948 
949 static int
950 ipfq_drain_sysctl SYSCTL_HANDLER_ARGS
951 {
952 #pragma unused(oidp, arg2)
953 	struct fsw_ip_frag_mgr *__single mgr = arg1;
954 
955 	SKOID_PROC_CALL_GUARD;
956 
957 	lck_mtx_lock(&mgr->ipfm_lock);
958 	ipfq_drain(mgr);
959 	lck_mtx_unlock(&mgr->ipfm_lock);
960 
961 	return 0;
962 }
963 
964 static struct ipfq *
ipfq_alloc(struct fsw_ip_frag_mgr * mgr)965 ipfq_alloc(struct fsw_ip_frag_mgr *mgr)
966 {
967 	struct ipfq *q;
968 
969 	if (mgr->ipfm_q_count > mgr->ipfm_q_limit) {
970 		ipfq_reap(mgr);
971 	}
972 	ASSERT(mgr->ipfm_q_count <= mgr->ipfm_q_limit);
973 
974 	q = kalloc_type(struct ipfq, Z_WAITOK | Z_ZERO);
975 	if (q != NULL) {
976 		mgr->ipfm_q_count++;
977 		q->ipfq_is_dirty = false;
978 	}
979 	return q;
980 }
981 
982 /* free q */
983 static void
ipfq_free(struct fsw_ip_frag_mgr * mgr,struct ipfq * q)984 ipfq_free(struct fsw_ip_frag_mgr *mgr, struct ipfq *q)
985 {
986 	kfree_type(struct ipfq, q);
987 	mgr->ipfm_q_count--;
988 }
989 
990 /*
991  * Free all fragments, keep q.
992  * @return: number of frags freed
993  */
994 static uint32_t
ipfq_freef(struct fsw_ip_frag_mgr * mgr,struct ipfq * q,void (* ipf_cb)(struct fsw_ip_frag_mgr *,struct ipf *))995 ipfq_freef(struct fsw_ip_frag_mgr *mgr, struct ipfq *q,
996     void (*ipf_cb)(struct fsw_ip_frag_mgr *, struct ipf *))
997 {
998 	struct ipf *f, *down6;
999 	uint32_t nfrags = 0;
1000 
1001 	for (f = q->ipfq_down; f != (struct ipf *)q; f = down6) {
1002 		nfrags++;
1003 		down6 = f->ipf_down;
1004 		ipf_deq(f);
1005 		if (ipf_cb != NULL) {
1006 			(*ipf_cb)(mgr, f);
1007 		}
1008 		ipf_free_pkt(f);
1009 		ipf_free(mgr, f);
1010 	}
1011 
1012 	return nfrags;
1013 }
1014 
1015 /* Free both all fragments and q
1016  * @return: number of frags freed
1017  */
1018 static uint32_t
ipfq_freefq(struct fsw_ip_frag_mgr * mgr,struct ipfq * q,void (* ipf_cb)(struct fsw_ip_frag_mgr *,struct ipf *))1019 ipfq_freefq(struct fsw_ip_frag_mgr *mgr, struct ipfq *q,
1020     void (*ipf_cb)(struct fsw_ip_frag_mgr *, struct ipf *))
1021 {
1022 	uint32_t freed_count;
1023 	freed_count = ipfq_freef(mgr, q, ipf_cb);
1024 	ipfq_remque(q);
1025 	ipfq_free(mgr, q);
1026 	return freed_count;
1027 }
1028 
1029 static struct ipf *
ipf_alloc(struct fsw_ip_frag_mgr * mgr)1030 ipf_alloc(struct fsw_ip_frag_mgr *mgr)
1031 {
1032 	struct ipf *f;
1033 
1034 	if (mgr->ipfm_f_count > mgr->ipfm_f_limit) {
1035 		STATS_INC(mgr->ipfm_stats, FSW_STATS_RX_FRAG_DROP_FRAG_LIMIT);
1036 		return NULL;
1037 	}
1038 
1039 	f = kalloc_type(struct ipf, Z_WAITOK | Z_ZERO);
1040 	if (f != NULL) {
1041 		mgr->ipfm_f_count++;
1042 	}
1043 	return f;
1044 }
1045 
1046 static void
ipf_free_pkt(struct ipf * f)1047 ipf_free_pkt(struct ipf *f)
1048 {
1049 	struct __kern_packet *pkt = f->ipf_pkt;
1050 	ASSERT(pkt != NULL);
1051 	pp_free_packet(__DECONST(struct kern_pbufpool *, pkt->pkt_qum.qum_pp),
1052 	    SK_PTR_ADDR(pkt));
1053 }
1054 
1055 static void
ipf_free(struct fsw_ip_frag_mgr * mgr,struct ipf * f)1056 ipf_free(struct fsw_ip_frag_mgr *mgr, struct ipf *f)
1057 {
1058 	kfree_type(struct ipf, f);
1059 	mgr->ipfm_f_count--;
1060 }
1061