xref: /xnu-8020.140.41/bsd/net/dlil.h (revision 27b03b360a988dfd3dfdf34262bb0042026747cc)
1 /*
2  * Copyright (c) 1999-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 #ifndef DLIL_H
29 #define DLIL_H
30 #ifdef KERNEL
31 
32 #include <sys/kernel_types.h>
33 #include <net/kpi_interface.h>
34 
35 enum {
36 	BPF_TAP_DISABLE,
37 	BPF_TAP_INPUT,
38 	BPF_TAP_OUTPUT,
39 	BPF_TAP_INPUT_OUTPUT
40 };
41 
42 /*
43  * DLIL_DESC_ETYPE2 - native_type must point to 2 byte ethernet raw protocol,
44  *                    variants.native_type_length must be set to 2
45  * DLIL_DESC_SAP - native_type must point to 3 byte SAP protocol
46  *                 variants.native_type_length must be set to 3
47  * DLIL_DESC_SNAP - native_type must point to 5 byte SNAP protocol
48  *                  variants.native_type_length must be set to 5
49  *
50  * All protocols must be in Network byte order.
51  *
52  * Future interface families may define more protocol types they know about.
53  * The type implies the offset and context of the protocol data at native_type.
54  * The length of the protocol data specified at native_type must be set in
55  * variants.native_type_length.
56  */
57 /* Ethernet specific types */
58 #define DLIL_DESC_ETYPE2        4
59 #define DLIL_DESC_SAP           5
60 #define DLIL_DESC_SNAP          6
61 
62 #ifdef KERNEL_PRIVATE
63 #include <net/if.h>
64 #include <net/if_var.h>
65 #include <net/classq/classq.h>
66 #include <net/flowadv.h>
67 #include <sys/kern_event.h>
68 #include <kern/thread.h>
69 #include <kern/locks.h>
70 
71 #ifdef BSD_KERNEL_PRIVATE
72 /* Operations on timespecs. */
73 #define net_timerclear(tvp)     (tvp)->tv_sec = (tvp)->tv_nsec = 0
74 
75 #define net_timerisset(tvp)     ((tvp)->tv_sec || (tvp)->tv_nsec)
76 
77 #define net_timercmp(tvp, uvp, cmp)                                     \
78 	(((tvp)->tv_sec == (uvp)->tv_sec) ?                             \
79 	((tvp)->tv_nsec cmp (uvp)->tv_nsec) :                           \
80 	((tvp)->tv_sec cmp (uvp)->tv_sec))
81 
82 #define net_timeradd(tvp, uvp, vvp) do {                                \
83 	(vvp)->tv_sec = (tvp)->tv_sec + (uvp)->tv_sec;                  \
84 	(vvp)->tv_nsec = (tvp)->tv_nsec + (uvp)->tv_nsec;               \
85 	if ((vvp)->tv_nsec >= (long)NSEC_PER_SEC) {                     \
86 	        (vvp)->tv_sec++;                                        \
87 	        (vvp)->tv_nsec -= NSEC_PER_SEC;                         \
88 	}                                                               \
89 } while (0)
90 
91 #define net_timersub(tvp, uvp, vvp) do {                                \
92 	(vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec;                  \
93 	(vvp)->tv_nsec = (tvp)->tv_nsec - (uvp)->tv_nsec;               \
94 	if ((vvp)->tv_nsec < 0) {                                       \
95 	        (vvp)->tv_sec--;                                        \
96 	        (vvp)->tv_nsec += NSEC_PER_SEC;                         \
97 	}                                                               \
98 } while (0)
99 
100 #define net_timerusec(tvp, nsp) do {                                    \
101 	*(nsp) = (tvp)->tv_nsec / NSEC_PER_USEC;                        \
102 	if ((tvp)->tv_sec > 0)                                          \
103 	        *(nsp) += ((tvp)->tv_sec * USEC_PER_SEC);               \
104 } while (0)
105 
106 #define net_timernsec(tvp, nsp) do {                                    \
107 	*(nsp) = (tvp)->tv_nsec;                                        \
108 	if ((tvp)->tv_sec > 0)                                          \
109 	        *(nsp) += ((tvp)->tv_sec * NSEC_PER_SEC);               \
110 } while (0)
111 
112 #if defined(__x86_64__) || defined(__arm64__)
113 #define net_nsectimer(nsp, tvp) do {                                    \
114 	u_int64_t __nsp = *(nsp);                                       \
115 	net_timerclear(tvp);                                            \
116 	uint64_t __sec = __nsp / NSEC_PER_SEC;                          \
117 	(tvp)->tv_sec = (__darwin_time_t)__sec;                         \
118 	(tvp)->tv_nsec = (long)(__nsp - __sec * NSEC_PER_SEC);          \
119 } while (0)
120 #else /* 32 bit */
121 /*
122  * NSEC needs to be < 2^31*10^9 to be representable in a struct timespec
123  * because __darwin_time_t is 32 bit on 32-bit platforms. This bound
124  * is < 2^61. We get a first approximation to convert into seconds using
125  * the following values.
126  * a = floor(NSEC / 2^29)
127  * inv = floor(2^61 / 10^9)
128  *
129  * The approximation of seconds is correct or too low by 1 unit.
130  * So we fix it by computing the remainder.
131  */
132 #define net_nsectimer(nsp, tvp) do {                                    \
133 	u_int64_t __nsp = *(nsp);                                       \
134 	net_timerclear(tvp);                                            \
135 	uint32_t __a = (uint32_t)(__nsp >> 29);                         \
136 	const uint32_t __inv = 0x89705F41;                              \
137 	uint32_t __sec = (uint32_t)(((uint64_t)__a * __inv) >> 32);     \
138 	uint32_t __rem = (uint32_t)(__nsp - __sec * NSEC_PER_SEC);      \
139 	__sec += ((__rem >= NSEC_PER_SEC) ? 1 : 0);                     \
140 	(tvp)->tv_sec = (__darwin_time_t)__sec;                         \
141 	(tvp)->tv_nsec =                                                \
142 	    (long)((__rem >= NSEC_PER_SEC) ? (__rem - NSEC_PER_SEC) : __rem);   \
143 } while(0)
144 #endif /* 32 bit */
145 
146 struct ifnet;
147 struct mbuf;
148 struct ether_header;
149 struct sockaddr_dl;
150 struct iff_filter;
151 
152 #define DLIL_THREADNAME_LEN     32
153 
154 /*
155  * DLIL threading info
156  */
157 struct dlil_threading_info {
158 	decl_lck_mtx_data(, dlth_lock);
159 	class_queue_t   dlth_pkts;      /* queue of pkts */
160 	struct ifnet    *dlth_ifp;      /* pointer to interface */
161 	struct ifnet_stat_increment_param dlth_stats; /* incremental stats */
162 	uint32_t       dlth_flags;      /* thread flags (see below) */
163 	uint32_t       dlth_wtot;       /* # of wakeup requests */
164 
165 	/* strategy (sync or async) */
166 	errno_t (*dlth_strategy)(struct dlil_threading_info *,
167 	    struct ifnet *, struct mbuf *, struct mbuf *,
168 	    const struct ifnet_stat_increment_param *, boolean_t,
169 	    struct thread *);
170 
171 	/*
172 	 * Thread affinity (workloop and DLIL threads).
173 	 */
174 	boolean_t       dlth_affinity;          /* affinity set is available */
175 	uint32_t        dlth_affinity_tag;      /* affinity tag */
176 	struct thread   *dlth_thread;           /* DLIL worker thread */
177 	struct thread   *dlth_driver_thread;    /* driver/workloop thread */
178 	struct thread   *dlth_poller_thread;    /* poll thread */
179 
180 	lck_grp_t       *dlth_lock_grp; /* lock group (for lock stats) */
181 	char            dlth_name[DLIL_THREADNAME_LEN]; /* name storage */
182 
183 #if IFNET_INPUT_SANITY_CHK
184 	/*
185 	 * For debugging.
186 	 */
187 	uint64_t        dlth_pkts_cnt;          /* total # of packets */
188 #endif
189 };
190 
191 /*
192  * DLIL input thread info (for main/loopback input thread)
193  */
194 struct dlil_main_threading_info {
195 	struct dlil_threading_info      inp;
196 	class_queue_t                   lo_rcvq_pkts; /* queue of lo0 pkts */
197 };
198 
199 /*
200  * Valid values for dlth_flags.
201  *
202  * The following are shared with kpi_protocol.c so that it may wakeup
203  * the input thread to run through packets queued for protocol input.
204  */
205 #define DLIL_INPUT_RUNNING              0x80000000
206 #define DLIL_INPUT_WAITING              0x40000000
207 #define DLIL_PROTO_REGISTER             0x20000000
208 #define DLIL_PROTO_WAITING              0x10000000
209 #define DLIL_INPUT_TERMINATE            0x08000000
210 #define DLIL_INPUT_TERMINATE_COMPLETE   0x04000000
211 #define DLIL_INPUT_EMBRYONIC            0x00000001
212 
213 /*
214  * Flags for dlil_attach_filter()
215  */
216 #define DLIL_IFF_TSO            0x01    /* Interface filter supports TSO */
217 #define DLIL_IFF_INTERNAL       0x02    /* Apple internal -- do not count towards stats */
218 
219 /* Input poll interval definitions */
220 #define IF_RXPOLL_INTERVALTIME_MIN      (1ULL * 1000)           /* 1 us */
221 #define IF_RXPOLL_INTERVALTIME          (1ULL * 1000 * 1000)    /* 1 ms */
222 
223 extern int dlil_verbose;
224 extern uint32_t hwcksum_dbg;
225 extern uint32_t hwcksum_tx;
226 extern uint32_t hwcksum_rx;
227 extern struct dlil_threading_info *dlil_main_input_thread;
228 extern unsigned int net_rxpoll;
229 extern uint32_t if_rxpoll;
230 extern uint32_t if_rxpoll_decay;
231 extern uint32_t if_rxpoll_interval_pkts;
232 extern uint32_t if_rcvq_maxlen;
233 
234 extern void dlil_init(void);
235 
236 extern errno_t ifp_if_ioctl(struct ifnet *, unsigned long, void *);
237 extern errno_t ifp_if_output(struct ifnet *, struct mbuf *);
238 extern void ifp_if_start(struct ifnet *);
239 
240 extern errno_t dlil_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func);
241 
242 /*
243  * Send arp internal bypasses the check for IPv4LL.
244  */
245 extern errno_t dlil_send_arp_internal(ifnet_t, u_int16_t,
246     const struct sockaddr_dl *, const struct sockaddr *,
247     const struct sockaddr_dl *, const struct sockaddr *);
248 
249 /*
250  * The following constants are used with the net_thread_mark_apply and
251  * net_thread_is_unmarked functions to control the bits in the uu_network_marks
252  * field of the uthread structure.
253  */
254 #define NET_THREAD_HELD_PF      0x1     /* thread is holding PF lock */
255 #define NET_THREAD_HELD_DOMAIN  0x2     /* thread is holding domain_proto_mtx */
256 #define NET_THREAD_CKREQ_LLADDR 0x4     /* thread reqs MACF check for LLADDR */
257 #if SKYWALK
258 #define NET_THREAD_CHANNEL_SYNC 0x10000 /* thread is doing channel sync */
259 #define NET_THREAD_CACHE_UPDATE 0x20000 /* thread is doing cache update */
260 #define NET_THREAD_REGION_UPDATE 0x40000 /* thread is doing region update */
261 #define NET_THREAD_RX_NOTIFY    0x80000 /* thread is doing RX notify */
262 #define NET_THREAD_TX_NOTIFY    0x100000 /* thread is doing TX notify */
263 #define NET_THREAD_AYSYNC_TX    0x200000 /* require use of starter thread */
264 #endif /* SKYWALK */
265 
266 /*
267  * net_thread_marks_t is a pointer to a phantom structure type used for
268  * manipulating the uthread:uu_network_marks field.  As an example...
269  *
270  *   static const u_int32_t bits = NET_THREAD_CKREQ_LLADDR;
271  *   struct uthread *uth = current_uthread();
272  *
273  *   net_thread_marks_t marks = net_thread_marks_push(bits);
274  *   VERIFY((uth->uu_network_marks & NET_THREAD_CKREQ_LLADDR) != 0);
275  *   net_thread_marks_pop(marks);
276  *
277  * The net_thread_marks_push() function returns an encoding of the bits
278  * that were changed from zero to one in the uu_network_marks field. When
279  * the net_thread_marks_pop() function later processes that value, it
280  * resets the bits to their previous value.
281  *
282  * The net_thread_unmarks_push() and net_thread_unmarks_pop() functions
283  * are similar to net_thread_marks_push() and net_thread_marks_pop() except
284  * they clear the marks bits in the guarded section rather than set them.
285  *
286  * The net_thread_is_marked() and net_thread_is_unmarked() functions return
287  * the subset of the bits that are currently set or cleared (respectively)
288  * in the uthread:uu_network_marks field.
289  *
290  * Finally, the value of the net_thread_marks_none constant is provided for
291  * comparing for equality with the value returned when no bits in the marks
292  * field are changed by the push.
293  *
294  * It is not significant that a value of type net_thread_marks_t may
295  * compare as equal to the NULL pointer.
296  */
297 struct net_thread_marks;
298 typedef const struct net_thread_marks *net_thread_marks_t;
299 
300 extern const net_thread_marks_t net_thread_marks_none;
301 
302 extern net_thread_marks_t net_thread_marks_push(u_int32_t);
303 extern net_thread_marks_t net_thread_unmarks_push(u_int32_t);
304 extern void net_thread_marks_pop(net_thread_marks_t);
305 extern void net_thread_unmarks_pop(net_thread_marks_t);
306 extern u_int32_t net_thread_is_marked(u_int32_t);
307 extern u_int32_t net_thread_is_unmarked(u_int32_t);
308 
309 extern int dlil_output(ifnet_t, protocol_family_t, mbuf_t, void *,
310     const struct sockaddr *, int, struct flowadv *);
311 
312 extern void dlil_input_packet_list(struct ifnet *, struct mbuf *);
313 extern void dlil_input_packet_list_extended(struct ifnet *, struct mbuf *,
314     u_int32_t, ifnet_model_t);
315 
316 extern errno_t dlil_resolve_multi(struct ifnet *,
317     const struct sockaddr *, struct sockaddr *, size_t);
318 
319 extern errno_t dlil_send_arp(ifnet_t, u_int16_t, const struct sockaddr_dl *,
320     const struct sockaddr *, const struct sockaddr_dl *,
321     const struct sockaddr *, u_int32_t);
322 
323 extern int dlil_attach_filter(ifnet_t, const struct iff_filter *,
324     interface_filter_t *, u_int32_t);
325 extern void dlil_detach_filter(interface_filter_t);
326 extern boolean_t dlil_has_ip_filter(void);
327 extern boolean_t dlil_has_if_filter(struct ifnet *);
328 
329 extern void dlil_proto_unplumb_all(ifnet_t);
330 
331 extern int dlil_post_msg(struct ifnet *, u_int32_t, u_int32_t,
332     struct net_event_data *, u_int32_t, boolean_t);
333 
334 extern void dlil_post_sifflags_msg(struct ifnet *);
335 
336 extern int dlil_post_complete_msg(struct ifnet *, struct kev_msg *);
337 
338 extern int dlil_alloc_local_stats(struct ifnet *);
339 
340 extern void ifnet_filter_update_tso(struct ifnet *, boolean_t filter_enable);
341 extern errno_t dlil_rxpoll_validate_params(struct ifnet_poll_params *);
342 extern void dlil_rxpoll_update_params(struct ifnet *,
343     struct ifnet_poll_params *);
344 extern void ifnet_poll(struct ifnet *);
345 extern errno_t ifnet_input_poll(struct ifnet *, struct mbuf *,
346     struct mbuf *, const struct ifnet_stat_increment_param *);
347 
348 #if SKYWALK
349 extern boolean_t ifnet_needs_fsw_transport_netagent(ifnet_t ifp);
350 extern boolean_t ifnet_needs_fsw_ip_netagent(ifnet_t ifp);
351 extern boolean_t ifnet_needs_netif_netagent(ifnet_t ifp);
352 extern boolean_t ifnet_needs_compat(ifnet_t ifp);
353 extern boolean_t ifnet_nx_noauto(ifnet_t ifp);
354 extern boolean_t ifnet_nx_noauto_flowswitch(ifnet_t ifp);
355 extern boolean_t ifnet_is_low_latency(ifnet_t ifp);
356 extern boolean_t ifnet_attach_flowswitch_nexus(ifnet_t ifp);
357 extern boolean_t ifnet_detach_flowswitch_nexus(ifnet_t ifp);
358 extern boolean_t ifnet_attach_netif_nexus(ifnet_t ifp);
359 extern boolean_t ifnet_detach_netif_nexus(ifnet_t ifp);
360 extern boolean_t ifnet_add_netagent(ifnet_t ifp);
361 extern boolean_t ifnet_remove_netagent(ifnet_t ifp);
362 
363 #endif /* SKYWALK */
364 
365 /*
366  * dlil_if_acquire is obsolete. Use ifnet_allocate.
367  */
368 extern int dlil_if_acquire(u_int32_t, const void *, size_t, const char *, struct ifnet **);
369 /*
370  * dlil_if_release is obsolete. The equivalent is called automatically when
371  * an interface is detached.
372  */
373 extern void dlil_if_release(struct ifnet *ifp);
374 
375 extern errno_t dlil_if_ref(struct ifnet *);
376 extern errno_t dlil_if_free(struct ifnet *);
377 
378 extern int dlil_node_present(struct ifnet *, struct sockaddr *, int32_t, int,
379     int, u_int8_t[48]);
380 extern void dlil_node_absent(struct ifnet *, struct sockaddr *);
381 extern int dlil_node_present_v2(struct ifnet *, struct sockaddr *, struct sockaddr_dl *, int32_t, int,
382     int, u_int8_t[48]);
383 
384 extern const void *dlil_ifaddr_bytes(const struct sockaddr_dl *, size_t *,
385     kauth_cred_t *);
386 
387 extern void dlil_report_issues(struct ifnet *, u_int8_t[DLIL_MODIDLEN],
388     u_int8_t[DLIL_MODARGLEN]);
389 
390 #define PROTO_HASH_SLOTS        5
391 
392 extern int proto_hash_value(u_int32_t);
393 
394 extern const char *dlil_kev_dl_code_str(u_int32_t);
395 
396 extern errno_t dlil_rxpoll_set_params(struct ifnet *,
397     struct ifnet_poll_params *, boolean_t);
398 extern errno_t dlil_rxpoll_get_params(struct ifnet *,
399     struct ifnet_poll_params *);
400 
401 extern errno_t dlil_output_handler(struct ifnet *, struct mbuf *);
402 extern errno_t dlil_input_handler(struct ifnet *, struct mbuf *,
403     struct mbuf *, const struct ifnet_stat_increment_param *,
404     boolean_t, struct thread *);
405 extern void dlil_ifclassq_setup(struct ifnet *, struct ifclassq *);
406 
407 #if SKYWALK
408 extern errno_t dlil_set_input_handler(struct ifnet *ifp, dlil_input_func fn);
409 extern errno_t dlil_set_output_handler(struct ifnet *ifp, dlil_output_func fn);
410 extern void dlil_reset_input_handler(struct ifnet *ifp);
411 extern void dlil_reset_output_handler(struct ifnet *ifp);
412 #endif /* SKYWALK */
413 
414 /*
415  * This is mostly called from the context of the DLIL input thread;
416  * because of that there is no need for atomic operations.
417  */
418 __attribute__((always_inline))
419 static inline void
ifp_inc_traffic_class_in(struct ifnet * ifp,struct mbuf * m)420 ifp_inc_traffic_class_in(struct ifnet *ifp, struct mbuf *m)
421 {
422 	if (!(m->m_flags & M_PKTHDR)) {
423 		return;
424 	}
425 
426 	switch (m_get_traffic_class(m)) {
427 	case MBUF_TC_BE:
428 		ifp->if_tc.ifi_ibepackets++;
429 		ifp->if_tc.ifi_ibebytes += (u_int64_t)m->m_pkthdr.len;
430 		break;
431 	case MBUF_TC_BK:
432 		ifp->if_tc.ifi_ibkpackets++;
433 		ifp->if_tc.ifi_ibkbytes += (u_int64_t)m->m_pkthdr.len;
434 		break;
435 	case MBUF_TC_VI:
436 		ifp->if_tc.ifi_ivipackets++;
437 		ifp->if_tc.ifi_ivibytes += (u_int64_t)m->m_pkthdr.len;
438 		break;
439 	case MBUF_TC_VO:
440 		ifp->if_tc.ifi_ivopackets++;
441 		ifp->if_tc.ifi_ivobytes += (u_int64_t)m->m_pkthdr.len;
442 		break;
443 	default:
444 		break;
445 	}
446 
447 	if (mbuf_is_traffic_class_privileged(m)) {
448 		ifp->if_tc.ifi_ipvpackets++;
449 		ifp->if_tc.ifi_ipvbytes += (u_int64_t)m->m_pkthdr.len;
450 	}
451 }
452 
453 /*
454  * This is called from DLIL output, hence multiple threads could end
455  * up modifying the statistics.  We trade off acccuracy for performance
456  * by not using atomic operations here.
457  */
458 __attribute__((always_inline))
459 static inline void
ifp_inc_traffic_class_out(struct ifnet * ifp,struct mbuf * m)460 ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m)
461 {
462 	if (!(m->m_flags & M_PKTHDR)) {
463 		return;
464 	}
465 
466 	switch (m_get_traffic_class(m)) {
467 	case MBUF_TC_BE:
468 		ifp->if_tc.ifi_obepackets++;
469 		ifp->if_tc.ifi_obebytes += (u_int64_t)m->m_pkthdr.len;
470 		break;
471 	case MBUF_TC_BK:
472 		ifp->if_tc.ifi_obkpackets++;
473 		ifp->if_tc.ifi_obkbytes += (u_int64_t)m->m_pkthdr.len;
474 		break;
475 	case MBUF_TC_VI:
476 		ifp->if_tc.ifi_ovipackets++;
477 		ifp->if_tc.ifi_ovibytes += (u_int64_t)m->m_pkthdr.len;
478 		break;
479 	case MBUF_TC_VO:
480 		ifp->if_tc.ifi_ovopackets++;
481 		ifp->if_tc.ifi_ovobytes += (u_int64_t)m->m_pkthdr.len;
482 		break;
483 	default:
484 		break;
485 	}
486 
487 	if (mbuf_is_traffic_class_privileged(m)) {
488 		ifp->if_tc.ifi_opvpackets++;
489 		ifp->if_tc.ifi_opvbytes += (u_int64_t)m->m_pkthdr.len;
490 	}
491 }
492 
493 extern void ifnet_ioctl_async(struct ifnet *, u_long);
494 #endif /* BSD_KERNEL_PRIVATE */
495 #endif /* KERNEL_PRIVATE */
496 #endif /* KERNEL */
497 #endif /* DLIL_H */
498