xref: /xnu-8020.140.41/bsd/netinet/in_tclass.c (revision 27b03b360a988dfd3dfdf34262bb0042026747cc)
1 /*
2  * Copyright (c) 2009-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/types.h>
32 #include <sys/filedesc.h>
33 #include <sys/file_internal.h>
34 #include <sys/proc.h>
35 #include <sys/socket.h>
36 #include <sys/socketvar.h>
37 #include <sys/errno.h>
38 #include <sys/protosw.h>
39 #include <sys/domain.h>
40 #include <sys/mbuf.h>
41 #include <sys/queue.h>
42 #include <sys/sysctl.h>
43 #include <sys/sysproto.h>
44 
45 #include <net/if.h>
46 #include <net/if_var.h>
47 #include <net/route.h>
48 
49 #include <netinet/in.h>
50 #include <netinet/in_var.h>
51 #include <netinet/in_pcb.h>
52 #include <netinet/ip.h>
53 #include <netinet/ip_var.h>
54 #include <netinet/ip6.h>
55 #include <netinet6/ip6_var.h>
56 #include <netinet/udp.h>
57 #include <netinet/udp_var.h>
58 #include <netinet/tcp.h>
59 #include <netinet/tcp_var.h>
60 #include <netinet/tcp_cc.h>
61 #include <netinet/in_tclass.h>
62 
63 #include <os/log.h>
64 
65 static_assert(_SO_TC_MAX == SO_TC_STATS_MAX);
66 
67 struct net_qos_dscp_map {
68 	uint8_t        sotc_to_dscp[SO_TC_MAX];
69 	uint8_t        netsvctype_to_dscp[_NET_SERVICE_TYPE_COUNT];
70 };
71 
72 struct dcsp_msc_map {
73 	uint8_t                 dscp;
74 	mbuf_svc_class_t        msc;
75 };
76 static inline int so_throttle_best_effort(struct socket *, struct ifnet *);
77 static void set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *, int);
78 static errno_t dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *, size_t,
79     struct dcsp_msc_map *);
80 
81 static LCK_GRP_DECLARE(tclass_lck_grp, "tclass");
82 static LCK_MTX_DECLARE(tclass_lock, &tclass_lck_grp);
83 
84 SYSCTL_NODE(_net, OID_AUTO, qos,
85     CTLFLAG_RW | CTLFLAG_LOCKED, 0, "QoS");
86 
87 static int sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS;
88 SYSCTL_PROC(_net_qos, OID_AUTO, default_netsvctype_to_dscp_map,
89     CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
90     0, 0, sysctl_default_netsvctype_to_dscp_map, "S", "");
91 
92 static int sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
93 SYSCTL_PROC(_net_qos, OID_AUTO, dscp_to_wifi_ac_map,
94     CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
95     0, 0, sysctl_dscp_to_wifi_ac_map, "S", "");
96 
97 static int sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
98 SYSCTL_PROC(_net_qos, OID_AUTO, reset_dscp_to_wifi_ac_map,
99     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
100     0, 0, sysctl_reset_dscp_to_wifi_ac_map, "I", "");
101 
102 int net_qos_verbose = 0;
103 SYSCTL_INT(_net_qos, OID_AUTO, verbose,
104     CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_verbose, 0, "");
105 
106 /*
107  * Fastlane QoS policy:
108  * By Default allow all apps to get traffic class to DSCP mapping
109  */
110 SYSCTL_NODE(_net_qos, OID_AUTO, policy,
111     CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
112 
113 int net_qos_policy_restricted = 0;
114 SYSCTL_INT(_net_qos_policy, OID_AUTO, restricted,
115     CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restricted, 0, "");
116 
117 int net_qos_policy_restrict_avapps = 0;
118 SYSCTL_INT(_net_qos_policy, OID_AUTO, restrict_avapps,
119     CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restrict_avapps, 0, "");
120 
121 int net_qos_policy_wifi_enabled = 0;
122 SYSCTL_INT(_net_qos_policy, OID_AUTO, wifi_enabled,
123     CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_wifi_enabled, 0, "");
124 
125 int net_qos_policy_capable_enabled = 0;
126 SYSCTL_INT(_net_qos_policy, OID_AUTO, capable_enabled,
127     CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_capable_enabled, 0, "");
128 
129 /*
130  * Socket traffic class from network service type
131  */
132 const int sotc_by_netservicetype[_NET_SERVICE_TYPE_COUNT] = {
133 	SO_TC_BE,       /* NET_SERVICE_TYPE_BE */
134 	SO_TC_BK,       /* NET_SERVICE_TYPE_BK */
135 	SO_TC_VI,       /* NET_SERVICE_TYPE_SIG */
136 	SO_TC_VI,       /* NET_SERVICE_TYPE_VI */
137 	SO_TC_VO,       /* NET_SERVICE_TYPE_VO */
138 	SO_TC_RV,       /* NET_SERVICE_TYPE_RV */
139 	SO_TC_AV,       /* NET_SERVICE_TYPE_AV */
140 	SO_TC_OAM,      /* NET_SERVICE_TYPE_OAM */
141 	SO_TC_RD        /* NET_SERVICE_TYPE_RD */
142 };
143 
144 /*
145  * DSCP mappings for QoS Fastlane as based on network service types
146  */
147 static const
148 struct netsvctype_dscp_map fastlane_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
149 	{ .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF },
150 	{ .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_AF11 },
151 	{ .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS3 },
152 	{ .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 },
153 	{ .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF },
154 	{ .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 },
155 	{ .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 },
156 	{ .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 },
157 	{ .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 },
158 };
159 
160 /*
161  * DSCP mappings for QoS RFC4594 as based on network service types
162  */
163 static const
164 struct netsvctype_dscp_map rfc4594_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
165 	{ .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF },
166 	{ .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_CS1 },
167 	{ .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS5 },
168 	{ .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 },
169 	{ .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF },
170 	{ .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 },
171 	{ .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 },
172 	{ .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 },
173 	{ .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 },
174 };
175 
176 static struct net_qos_dscp_map fastlane_net_qos_dscp_map;
177 static struct net_qos_dscp_map rfc4594_net_qos_dscp_map;
178 #if (DEBUG || DEVELOPMENT)
179 static struct net_qos_dscp_map custom_net_qos_dscp_map;
180 #endif /* (DEBUG || DEVELOPMENT) */
181 
182 /*
183  * The size is one more than the max because DSCP start at zero
184  */
185 #define DSCP_ARRAY_SIZE (_MAX_DSCP + 1)
186 
187 /*
188  * The DSCP to UP mapping (via mbuf service class) for WiFi follows is the mapping
189  * that implemented at the 802.11 driver level when the mbuf service class is
190  * MBUF_SC_BE.
191  *
192  * This clashes with the recommended mapping documented by the IETF document
193  * draft-szigeti-tsvwg-ieee-802-11e-01.txt but we keep the mapping to maintain
194  * binary compatibility. Applications should use the network service type socket
195  * option instead to select L2 QoS marking instead of IP_TOS or IPV6_TCLASS.
196  */
197 static const struct dcsp_msc_map default_dscp_to_wifi_ac_map[] = {
198 	{ .dscp = _DSCP_DF, .msc = MBUF_SC_BE },        /* RFC 2474 Standard */
199 	{ .dscp = 1, .msc = MBUF_SC_BE },               /*  */
200 	{ .dscp = 2, .msc = MBUF_SC_BE },               /*  */
201 	{ .dscp = 3, .msc = MBUF_SC_BE },               /*  */
202 	{ .dscp = 4, .msc = MBUF_SC_BE },               /*  */
203 	{ .dscp = 5, .msc = MBUF_SC_BE },               /*  */
204 	{ .dscp = 6, .msc = MBUF_SC_BE },               /*  */
205 	{ .dscp = 7, .msc = MBUF_SC_BE },               /*  */
206 
207 	{ .dscp = _DSCP_CS1, .msc = MBUF_SC_BK },       /* RFC 3662 Low-Priority Data */
208 	{ .dscp = 9, .msc = MBUF_SC_BK },               /*  */
209 	{ .dscp = _DSCP_AF11, .msc = MBUF_SC_BK },      /* RFC 2597 High-Throughput Data */
210 	{ .dscp = 11, .msc = MBUF_SC_BK },              /*  */
211 	{ .dscp = _DSCP_AF12, .msc = MBUF_SC_BK },      /* RFC 2597 High-Throughput Data */
212 	{ .dscp = 13, .msc = MBUF_SC_BK },              /*  */
213 	{ .dscp = _DSCP_AF13, .msc = MBUF_SC_BK },      /* RFC 2597 High-Throughput Data */
214 	{ .dscp = 15, .msc = MBUF_SC_BK },              /*  */
215 
216 	{ .dscp = _DSCP_CS2, .msc = MBUF_SC_BK },       /* RFC 4594 OAM */
217 	{ .dscp = 17, .msc = MBUF_SC_BK },              /*  */
218 	{ .dscp = _DSCP_AF21, .msc = MBUF_SC_BK },      /* RFC 2597 Low-Latency Data */
219 	{ .dscp = 19, .msc = MBUF_SC_BK },              /*  */
220 	{ .dscp = _DSCP_AF22, .msc = MBUF_SC_BK },      /* RFC 2597 Low-Latency Data */
221 	{ .dscp = 21, .msc = MBUF_SC_BK },              /*  */
222 	{ .dscp = _DSCP_AF23, .msc = MBUF_SC_BK },      /* RFC 2597 Low-Latency Data */
223 	{ .dscp = 23, .msc = MBUF_SC_BK },              /*  */
224 
225 	{ .dscp = _DSCP_CS3, .msc = MBUF_SC_BE },       /* RFC 2474 Broadcast Video */
226 	{ .dscp = 25, .msc = MBUF_SC_BE },              /*  */
227 	{ .dscp = _DSCP_AF31, .msc = MBUF_SC_BE },      /* RFC 2597 Multimedia Streaming */
228 	{ .dscp = 27, .msc = MBUF_SC_BE },              /*  */
229 	{ .dscp = _DSCP_AF32, .msc = MBUF_SC_BE },      /* RFC 2597 Multimedia Streaming */
230 	{ .dscp = 29, .msc = MBUF_SC_BE },              /*  */
231 	{ .dscp = _DSCP_AF33, .msc = MBUF_SC_BE },      /* RFC 2597 Multimedia Streaming */
232 	{ .dscp = 31, .msc = MBUF_SC_BE },              /*  */
233 
234 	{ .dscp = _DSCP_CS4, .msc = MBUF_SC_VI },       /* RFC 2474 Real-Time Interactive */
235 	{ .dscp = 33, .msc = MBUF_SC_VI },              /*  */
236 	{ .dscp = _DSCP_AF41, .msc = MBUF_SC_VI },      /* RFC 2597 Multimedia Conferencing */
237 	{ .dscp = 35, .msc = MBUF_SC_VI },              /*  */
238 	{ .dscp = _DSCP_AF42, .msc = MBUF_SC_VI },      /* RFC 2597 Multimedia Conferencing */
239 	{ .dscp = 37, .msc = MBUF_SC_VI },              /*  */
240 	{ .dscp = _DSCP_AF43, .msc = MBUF_SC_VI },      /* RFC 2597 Multimedia Conferencing */
241 	{ .dscp = 39, .msc = MBUF_SC_VI },              /*  */
242 
243 	{ .dscp = _DSCP_CS5, .msc = MBUF_SC_VI },       /* RFC 2474 Signaling */
244 	{ .dscp = 41, .msc = MBUF_SC_VI },              /*  */
245 	{ .dscp = 42, .msc = MBUF_SC_VI },              /*  */
246 	{ .dscp = 43, .msc = MBUF_SC_VI },              /*  */
247 	{ .dscp = _DSCP_VA, .msc = MBUF_SC_VI },        /* RFC 5865 VOICE-ADMIT */
248 	{ .dscp = 45, .msc = MBUF_SC_VI },              /*  */
249 	{ .dscp = _DSCP_EF, .msc = MBUF_SC_VI },        /* RFC 3246 Telephony */
250 	{ .dscp = 47, .msc = MBUF_SC_VI },              /*  */
251 
252 	{ .dscp = _DSCP_CS6, .msc = MBUF_SC_VO },       /* Wi-Fi WMM Certification: Chariot */
253 	{ .dscp = 49, .msc = MBUF_SC_VO },              /*  */
254 	{ .dscp = 50, .msc = MBUF_SC_VO },              /*  */
255 	{ .dscp = 51, .msc = MBUF_SC_VO },              /*  */
256 	{ .dscp = 52, .msc = MBUF_SC_VO },              /* Wi-Fi WMM Certification: Sigma */
257 	{ .dscp = 53, .msc = MBUF_SC_VO },              /*  */
258 	{ .dscp = 54, .msc = MBUF_SC_VO },              /*  */
259 	{ .dscp = 55, .msc = MBUF_SC_VO },              /*  */
260 
261 	{ .dscp = _DSCP_CS7, .msc = MBUF_SC_VO },       /* Wi-Fi WMM Certification: Chariot */
262 	{ .dscp = 57, .msc = MBUF_SC_VO },              /*  */
263 	{ .dscp = 58, .msc = MBUF_SC_VO },              /*  */
264 	{ .dscp = 59, .msc = MBUF_SC_VO },              /*  */
265 	{ .dscp = 60, .msc = MBUF_SC_VO },              /*  */
266 	{ .dscp = 61, .msc = MBUF_SC_VO },              /*  */
267 	{ .dscp = 62, .msc = MBUF_SC_VO },              /*  */
268 	{ .dscp = 63, .msc = MBUF_SC_VO },              /*  */
269 
270 	{ .dscp = 255, .msc = MBUF_SC_UNSPEC }          /* invalid DSCP to mark last entry */
271 };
272 
273 mbuf_svc_class_t wifi_dscp_to_msc_array[DSCP_ARRAY_SIZE];
274 
275 /*
276  * If there is no foreground activity on the interface for bg_switch_time
277  * seconds, the background connections can switch to foreground TCP
278  * congestion control.
279  */
280 #define TCP_BG_SWITCH_TIME 2 /* seconds */
281 
282 #if (DEVELOPMENT || DEBUG)
283 
284 static int tfp_count = 0;
285 
286 static TAILQ_HEAD(, tclass_for_proc) tfp_head =
287     TAILQ_HEAD_INITIALIZER(tfp_head);
288 
289 struct tclass_for_proc {
290 	TAILQ_ENTRY(tclass_for_proc)    tfp_link;
291 	int             tfp_class;
292 	pid_t           tfp_pid;
293 	char            tfp_pname[(2 * MAXCOMLEN) + 1];
294 	uint32_t        tfp_qos_mode;
295 };
296 
297 static int get_pid_tclass(struct so_tcdbg *);
298 static int get_pname_tclass(struct so_tcdbg *);
299 static int set_pid_tclass(struct so_tcdbg *);
300 static int set_pname_tclass(struct so_tcdbg *);
301 static int flush_pid_tclass(struct so_tcdbg *);
302 static int purge_tclass_for_proc(void);
303 static int flush_tclass_for_proc(void);
304 static void set_tclass_for_curr_proc(struct socket *);
305 
306 /*
307  * Must be called with tclass_lock held
308  */
309 static struct tclass_for_proc *
find_tfp_by_pid(pid_t pid)310 find_tfp_by_pid(pid_t pid)
311 {
312 	struct tclass_for_proc *tfp;
313 
314 	TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
315 		if (tfp->tfp_pid == pid) {
316 			break;
317 		}
318 	}
319 	return tfp;
320 }
321 
322 /*
323  * Must be called with tclass_lock held
324  */
325 static struct tclass_for_proc *
find_tfp_by_pname(const char * pname)326 find_tfp_by_pname(const char *pname)
327 {
328 	struct tclass_for_proc *tfp;
329 
330 	TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
331 		if (strncmp(pname, tfp->tfp_pname,
332 		    sizeof(tfp->tfp_pname)) == 0) {
333 			break;
334 		}
335 	}
336 	return tfp;
337 }
338 
339 __private_extern__ void
set_tclass_for_curr_proc(struct socket * so)340 set_tclass_for_curr_proc(struct socket *so)
341 {
342 	struct tclass_for_proc *tfp = NULL;
343 	proc_t p = current_proc();      /* Not ref counted */
344 	pid_t pid = proc_pid(p);
345 	char *pname = proc_best_name(p);
346 
347 	lck_mtx_lock(&tclass_lock);
348 
349 	TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
350 		if ((tfp->tfp_pid == pid) || (tfp->tfp_pid == -1 &&
351 		    strncmp(pname, tfp->tfp_pname,
352 		    sizeof(tfp->tfp_pname)) == 0)) {
353 			if (tfp->tfp_class != SO_TC_UNSPEC) {
354 				so->so_traffic_class = (uint16_t)tfp->tfp_class;
355 			}
356 
357 			if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) {
358 				so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
359 			} else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) {
360 				so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
361 			}
362 			break;
363 		}
364 	}
365 
366 	lck_mtx_unlock(&tclass_lock);
367 }
368 
369 /*
370  * Purge entries with PIDs of exited processes
371  */
372 int
purge_tclass_for_proc(void)373 purge_tclass_for_proc(void)
374 {
375 	int error = 0;
376 	struct tclass_for_proc *tfp, *tvar;
377 
378 	lck_mtx_lock(&tclass_lock);
379 
380 	TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
381 		proc_t p;
382 
383 		if (tfp->tfp_pid == -1) {
384 			continue;
385 		}
386 		if ((p = proc_find(tfp->tfp_pid)) == NULL) {
387 			tfp_count--;
388 			TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
389 
390 			kfree_type(struct tclass_for_proc, tfp);
391 		} else {
392 			proc_rele(p);
393 		}
394 	}
395 
396 	lck_mtx_unlock(&tclass_lock);
397 
398 	return error;
399 }
400 
401 /*
402  * Remove one entry
403  * Must be called with tclass_lock held
404  */
405 static void
free_tclass_for_proc(struct tclass_for_proc * tfp)406 free_tclass_for_proc(struct tclass_for_proc *tfp)
407 {
408 	if (tfp == NULL) {
409 		return;
410 	}
411 	tfp_count--;
412 	TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
413 	kfree_type(struct tclass_for_proc, tfp);
414 }
415 
416 /*
417  * Remove all entries
418  */
419 int
flush_tclass_for_proc(void)420 flush_tclass_for_proc(void)
421 {
422 	int error = 0;
423 	struct tclass_for_proc *tfp, *tvar;
424 
425 	lck_mtx_lock(&tclass_lock);
426 
427 	TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
428 		free_tclass_for_proc(tfp);
429 	}
430 
431 	lck_mtx_unlock(&tclass_lock);
432 
433 	return error;
434 }
435 
436 /*
437  * Must be called with tclass_lock held
438  */
439 static struct tclass_for_proc *
alloc_tclass_for_proc(pid_t pid,const char * pname)440 alloc_tclass_for_proc(pid_t pid, const char *pname)
441 {
442 	struct tclass_for_proc *tfp;
443 
444 	if (pid == -1 && pname == NULL) {
445 		return NULL;
446 	}
447 
448 	tfp = kalloc_type(struct tclass_for_proc, Z_NOWAIT | Z_ZERO);
449 	if (tfp == NULL) {
450 		return NULL;
451 	}
452 
453 	tfp->tfp_pid = pid;
454 	/*
455 	 * Add per pid entries before per proc name so we can find
456 	 * a specific instance of a process before the general name base entry.
457 	 */
458 	if (pid != -1) {
459 		TAILQ_INSERT_HEAD(&tfp_head, tfp, tfp_link);
460 	} else {
461 		strlcpy(tfp->tfp_pname, pname, sizeof(tfp->tfp_pname));
462 		TAILQ_INSERT_TAIL(&tfp_head, tfp, tfp_link);
463 	}
464 
465 	tfp_count++;
466 
467 	return tfp;
468 }
469 
470 /*
471  * SO_TC_UNSPEC for tclass means to remove the entry
472  */
473 int
set_pid_tclass(struct so_tcdbg * so_tcdbg)474 set_pid_tclass(struct so_tcdbg *so_tcdbg)
475 {
476 	int error = EINVAL;
477 	proc_t p = NULL;
478 	struct tclass_for_proc *tfp;
479 	pid_t pid = so_tcdbg->so_tcdbg_pid;
480 	int tclass = so_tcdbg->so_tcdbg_tclass;
481 	int netsvctype = so_tcdbg->so_tcdbg_netsvctype;
482 
483 	p = proc_find(pid);
484 	if (p == NULL) {
485 		printf("%s proc_find(%d) failed\n", __func__, pid);
486 		goto done;
487 	}
488 
489 	/* Need a tfp */
490 	lck_mtx_lock(&tclass_lock);
491 
492 	tfp = find_tfp_by_pid(pid);
493 	if (tfp == NULL) {
494 		tfp = alloc_tclass_for_proc(pid, NULL);
495 		if (tfp == NULL) {
496 			lck_mtx_unlock(&tclass_lock);
497 			error = ENOBUFS;
498 			goto done;
499 		}
500 	}
501 	tfp->tfp_class = tclass;
502 	tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
503 
504 	lck_mtx_unlock(&tclass_lock);
505 
506 	if (tfp != NULL) {
507 		struct fileproc *fp;
508 
509 		proc_fdlock(p);
510 
511 		fdt_foreach(fp, p) {
512 			struct socket *so;
513 
514 			if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
515 				continue;
516 			}
517 
518 			so = (struct socket *)fp_get_data(fp);
519 			if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
520 				continue;
521 			}
522 
523 			socket_lock(so, 1);
524 			if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) {
525 				so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
526 			} else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) {
527 				so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
528 			}
529 			socket_unlock(so, 1);
530 
531 			if (netsvctype != _NET_SERVICE_TYPE_UNSPEC) {
532 				error = sock_setsockopt(so, SOL_SOCKET,
533 				    SO_NET_SERVICE_TYPE, &netsvctype, sizeof(int));
534 			}
535 			if (tclass != SO_TC_UNSPEC) {
536 				error = sock_setsockopt(so, SOL_SOCKET,
537 				    SO_TRAFFIC_CLASS, &tclass, sizeof(int));
538 			}
539 		}
540 
541 		proc_fdunlock(p);
542 	}
543 
544 	error = 0;
545 done:
546 	if (p != NULL) {
547 		proc_rele(p);
548 	}
549 
550 	return error;
551 }
552 
553 int
set_pname_tclass(struct so_tcdbg * so_tcdbg)554 set_pname_tclass(struct so_tcdbg *so_tcdbg)
555 {
556 	int error = EINVAL;
557 	struct tclass_for_proc *tfp;
558 
559 	lck_mtx_lock(&tclass_lock);
560 
561 	tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
562 	if (tfp == NULL) {
563 		tfp = alloc_tclass_for_proc(-1, so_tcdbg->so_tcdbg_pname);
564 		if (tfp == NULL) {
565 			lck_mtx_unlock(&tclass_lock);
566 			error = ENOBUFS;
567 			goto done;
568 		}
569 	}
570 	tfp->tfp_class = so_tcdbg->so_tcdbg_tclass;
571 	tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
572 
573 	lck_mtx_unlock(&tclass_lock);
574 
575 	error = 0;
576 done:
577 
578 	return error;
579 }
580 
581 static int
flush_pid_tclass(struct so_tcdbg * so_tcdbg)582 flush_pid_tclass(struct so_tcdbg *so_tcdbg)
583 {
584 	pid_t pid = so_tcdbg->so_tcdbg_pid;
585 	int tclass = so_tcdbg->so_tcdbg_tclass;
586 	struct fileproc *fp;
587 	proc_t p;
588 	int error;
589 
590 	p = proc_find(pid);
591 	if (p == PROC_NULL) {
592 		printf("%s proc_find(%d) failed\n", __func__, pid);
593 		return EINVAL;
594 	}
595 
596 	proc_fdlock(p);
597 
598 	fdt_foreach(fp, p) {
599 		struct socket *so;
600 
601 		if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
602 			continue;
603 		}
604 
605 		so = (struct socket *)fp_get_data(fp);
606 		error = sock_setsockopt(so, SOL_SOCKET, SO_FLUSH, &tclass,
607 		    sizeof(tclass));
608 		if (error != 0) {
609 			printf("%s: setsockopt(SO_FLUSH) (so=0x%llx, fd=%d, "
610 			    "tclass=%d) failed %d\n", __func__,
611 			    (uint64_t)VM_KERNEL_ADDRPERM(so), fdt_foreach_fd(), tclass,
612 			    error);
613 		}
614 	}
615 
616 	proc_fdunlock(p);
617 
618 	proc_rele(p);
619 	return 0;
620 }
621 
622 int
get_pid_tclass(struct so_tcdbg * so_tcdbg)623 get_pid_tclass(struct so_tcdbg *so_tcdbg)
624 {
625 	int error = EINVAL;
626 	proc_t p = NULL;
627 	struct tclass_for_proc *tfp;
628 	pid_t pid = so_tcdbg->so_tcdbg_pid;
629 
630 	so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
631 
632 	p = proc_find(pid);
633 	if (p == NULL) {
634 		printf("%s proc_find(%d) failed\n", __func__, pid);
635 		goto done;
636 	}
637 
638 	/* Need a tfp */
639 	lck_mtx_lock(&tclass_lock);
640 
641 	tfp = find_tfp_by_pid(pid);
642 	if (tfp != NULL) {
643 		so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
644 		so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
645 		error = 0;
646 	}
647 	lck_mtx_unlock(&tclass_lock);
648 done:
649 	if (p != NULL) {
650 		proc_rele(p);
651 	}
652 
653 	return error;
654 }
655 
656 int
get_pname_tclass(struct so_tcdbg * so_tcdbg)657 get_pname_tclass(struct so_tcdbg *so_tcdbg)
658 {
659 	int error = EINVAL;
660 	struct tclass_for_proc *tfp;
661 
662 	so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
663 
664 	/* Need a tfp */
665 	lck_mtx_lock(&tclass_lock);
666 
667 	tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
668 	if (tfp != NULL) {
669 		so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
670 		so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
671 		error = 0;
672 	}
673 	lck_mtx_unlock(&tclass_lock);
674 
675 	return error;
676 }
677 
678 static int
delete_tclass_for_pid_pname(struct so_tcdbg * so_tcdbg)679 delete_tclass_for_pid_pname(struct so_tcdbg *so_tcdbg)
680 {
681 	int error = EINVAL;
682 	pid_t pid = so_tcdbg->so_tcdbg_pid;
683 	struct tclass_for_proc *tfp = NULL;
684 
685 	lck_mtx_lock(&tclass_lock);
686 
687 	if (pid != -1) {
688 		tfp = find_tfp_by_pid(pid);
689 	} else {
690 		tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
691 	}
692 
693 	if (tfp != NULL) {
694 		free_tclass_for_proc(tfp);
695 		error = 0;
696 	}
697 
698 	lck_mtx_unlock(&tclass_lock);
699 
700 	return error;
701 }
702 
703 /*
704  * Setting options requires privileges
705  */
706 __private_extern__ int
so_set_tcdbg(struct socket * so,struct so_tcdbg * so_tcdbg)707 so_set_tcdbg(struct socket *so, struct so_tcdbg *so_tcdbg)
708 {
709 	int error = 0;
710 
711 	if ((so->so_state & SS_PRIV) == 0) {
712 		return EPERM;
713 	}
714 
715 	socket_unlock(so, 0);
716 
717 	switch (so_tcdbg->so_tcdbg_cmd) {
718 	case SO_TCDBG_PID:
719 		error = set_pid_tclass(so_tcdbg);
720 		break;
721 
722 	case SO_TCDBG_PNAME:
723 		error = set_pname_tclass(so_tcdbg);
724 		break;
725 
726 	case SO_TCDBG_PURGE:
727 		error = purge_tclass_for_proc();
728 		break;
729 
730 	case SO_TCDBG_FLUSH:
731 		error = flush_tclass_for_proc();
732 		break;
733 
734 	case SO_TCDBG_DELETE:
735 		error = delete_tclass_for_pid_pname(so_tcdbg);
736 		break;
737 
738 	case SO_TCDBG_TCFLUSH_PID:
739 		error = flush_pid_tclass(so_tcdbg);
740 		break;
741 
742 	default:
743 		error = EINVAL;
744 		break;
745 	}
746 
747 	socket_lock(so, 0);
748 
749 	return error;
750 }
751 
752 /*
753  * Not required to be privileged to get
754  */
755 __private_extern__ int
sogetopt_tcdbg(struct socket * so,struct sockopt * sopt)756 sogetopt_tcdbg(struct socket *so, struct sockopt *sopt)
757 {
758 	int error = 0;
759 	struct so_tcdbg so_tcdbg;
760 	void *buf = NULL;
761 	size_t len = sopt->sopt_valsize;
762 
763 	error = sooptcopyin(sopt, &so_tcdbg, sizeof(struct so_tcdbg),
764 	    sizeof(struct so_tcdbg));
765 	if (error != 0) {
766 		return error;
767 	}
768 
769 	sopt->sopt_valsize = len;
770 
771 	socket_unlock(so, 0);
772 
773 	switch (so_tcdbg.so_tcdbg_cmd) {
774 	case SO_TCDBG_PID:
775 		error = get_pid_tclass(&so_tcdbg);
776 		break;
777 
778 	case SO_TCDBG_PNAME:
779 		error = get_pname_tclass(&so_tcdbg);
780 		break;
781 
782 	case SO_TCDBG_COUNT:
783 		lck_mtx_lock(&tclass_lock);
784 		so_tcdbg.so_tcdbg_count = tfp_count;
785 		lck_mtx_unlock(&tclass_lock);
786 		break;
787 
788 	case SO_TCDBG_LIST: {
789 		struct tclass_for_proc *tfp;
790 		int n, alloc_count;
791 		struct so_tcdbg *ptr;
792 
793 		lck_mtx_lock(&tclass_lock);
794 		if ((alloc_count = tfp_count) == 0) {
795 			lck_mtx_unlock(&tclass_lock);
796 			error = EINVAL;
797 			break;
798 		}
799 		len = alloc_count * sizeof(struct so_tcdbg);
800 		lck_mtx_unlock(&tclass_lock);
801 
802 		buf = kalloc_data(len, Z_WAITOK | Z_ZERO);
803 		if (buf == NULL) {
804 			error = ENOBUFS;
805 			break;
806 		}
807 
808 		lck_mtx_lock(&tclass_lock);
809 		n = 0;
810 		ptr = (struct so_tcdbg *)buf;
811 		TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
812 			if (++n > alloc_count) {
813 				break;
814 			}
815 			if (tfp->tfp_pid != -1) {
816 				ptr->so_tcdbg_cmd = SO_TCDBG_PID;
817 				ptr->so_tcdbg_pid = tfp->tfp_pid;
818 			} else {
819 				ptr->so_tcdbg_cmd = SO_TCDBG_PNAME;
820 				ptr->so_tcdbg_pid = -1;
821 				strlcpy(ptr->so_tcdbg_pname,
822 				    tfp->tfp_pname,
823 				    sizeof(ptr->so_tcdbg_pname));
824 			}
825 			ptr->so_tcdbg_tclass = tfp->tfp_class;
826 			ptr->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
827 			ptr++;
828 		}
829 
830 		lck_mtx_unlock(&tclass_lock);
831 	}
832 	break;
833 
834 	default:
835 		error = EINVAL;
836 		break;
837 	}
838 
839 	socket_lock(so, 0);
840 
841 	if (error == 0) {
842 		if (buf == NULL) {
843 			error = sooptcopyout(sopt, &so_tcdbg,
844 			    sizeof(struct so_tcdbg));
845 		} else {
846 			error = sooptcopyout(sopt, buf, len);
847 			kfree_data(buf, len);
848 		}
849 	}
850 	return error;
851 }
852 
853 #endif /* (DEVELOPMENT || DEBUG) */
854 
855 int
so_get_netsvc_marking_level(struct socket * so)856 so_get_netsvc_marking_level(struct socket *so)
857 {
858 	int marking_level = NETSVC_MRKNG_UNKNOWN;
859 	struct ifnet *ifp = NULL;
860 
861 	switch (SOCK_DOM(so)) {
862 	case PF_INET: {
863 		struct inpcb *inp = sotoinpcb(so);
864 
865 		if (inp != NULL) {
866 			ifp = inp->inp_last_outifp;
867 		}
868 		break;
869 	}
870 	case PF_INET6: {
871 		struct in6pcb *in6p = sotoin6pcb(so);
872 
873 		if (in6p != NULL) {
874 			ifp = in6p->in6p_last_outifp;
875 		}
876 		break;
877 	}
878 	default:
879 		break;
880 	}
881 	if (ifp != NULL) {
882 		if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0) {
883 			if ((so->so_flags1 & SOF1_QOSMARKING_ALLOWED)) {
884 				marking_level = NETSVC_MRKNG_LVL_L3L2_ALL;
885 			} else {
886 				marking_level = NETSVC_MRKNG_LVL_L3L2_BK;
887 			}
888 		} else {
889 			marking_level = NETSVC_MRKNG_LVL_L2;
890 		}
891 	}
892 	return marking_level;
893 }
894 
895 __private_extern__ int
so_set_traffic_class(struct socket * so,int optval)896 so_set_traffic_class(struct socket *so, int optval)
897 {
898 	int error = 0;
899 
900 	if (optval < SO_TC_BE || optval > SO_TC_CTL) {
901 		error = EINVAL;
902 	} else {
903 		switch (optval) {
904 		case _SO_TC_BK:
905 			optval = SO_TC_BK;
906 			break;
907 		case _SO_TC_VI:
908 			optval = SO_TC_VI;
909 			break;
910 		case _SO_TC_VO:
911 			optval = SO_TC_VO;
912 			break;
913 		default:
914 			if (!SO_VALID_TC(optval)) {
915 				error = EINVAL;
916 			}
917 			break;
918 		}
919 
920 		if (error == 0) {
921 			int oldval = so->so_traffic_class;
922 
923 			VERIFY(SO_VALID_TC(optval));
924 			so->so_traffic_class = (uint16_t)optval;
925 
926 			if ((SOCK_DOM(so) == PF_INET ||
927 			    SOCK_DOM(so) == PF_INET6) &&
928 			    SOCK_TYPE(so) == SOCK_STREAM) {
929 				set_tcp_stream_priority(so);
930 			}
931 
932 			if ((SOCK_DOM(so) == PF_INET ||
933 			    SOCK_DOM(so) == PF_INET6) &&
934 			    optval != oldval && (optval == SO_TC_BK_SYS ||
935 			    oldval == SO_TC_BK_SYS)) {
936 				/*
937 				 * If the app switches from BK_SYS to something
938 				 * else, resume the socket if it was suspended.
939 				 */
940 				if (oldval == SO_TC_BK_SYS) {
941 					inp_reset_fc_state(so->so_pcb);
942 				}
943 
944 				SOTHROTTLELOG("throttle[%d]: so 0x%llx "
945 				    "[%d,%d] opportunistic %s\n", so->last_pid,
946 				    (uint64_t)VM_KERNEL_ADDRPERM(so),
947 				    SOCK_DOM(so), SOCK_TYPE(so),
948 				    (optval == SO_TC_BK_SYS) ? "ON" : "OFF");
949 			}
950 		}
951 	}
952 	return error;
953 }
954 
955 __private_extern__ int
so_set_net_service_type(struct socket * so,int netsvctype)956 so_set_net_service_type(struct socket *so, int netsvctype)
957 {
958 	int sotc;
959 	int error;
960 
961 	if (!IS_VALID_NET_SERVICE_TYPE(netsvctype)) {
962 		return EINVAL;
963 	}
964 
965 	sotc = sotc_by_netservicetype[netsvctype];
966 	error = so_set_traffic_class(so, sotc);
967 	if (error != 0) {
968 		return error;
969 	}
970 	so->so_netsvctype = (int8_t)netsvctype;
971 	so->so_flags1 |= SOF1_TC_NET_SERV_TYPE;
972 
973 	return 0;
974 }
975 
976 __private_extern__ void
so_set_default_traffic_class(struct socket * so)977 so_set_default_traffic_class(struct socket *so)
978 {
979 	so->so_traffic_class = SO_TC_BE;
980 
981 	if ((SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6)) {
982 		if (net_qos_policy_restricted == 0) {
983 			so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
984 		}
985 #if (DEVELOPMENT || DEBUG)
986 		if (tfp_count > 0) {
987 			set_tclass_for_curr_proc(so);
988 		}
989 #endif /* (DEVELOPMENT || DEBUG) */
990 	}
991 }
992 
993 __private_extern__ int
so_set_opportunistic(struct socket * so,int optval)994 so_set_opportunistic(struct socket *so, int optval)
995 {
996 	return so_set_traffic_class(so, (optval == 0) ?
997 	           SO_TC_BE : SO_TC_BK_SYS);
998 }
999 
1000 __private_extern__ int
so_get_opportunistic(struct socket * so)1001 so_get_opportunistic(struct socket *so)
1002 {
1003 	return so->so_traffic_class == SO_TC_BK_SYS;
1004 }
1005 
1006 __private_extern__ int
so_tc_from_control(struct mbuf * control,int * out_netsvctype)1007 so_tc_from_control(struct mbuf *control, int *out_netsvctype)
1008 {
1009 	struct cmsghdr *cm;
1010 	int sotc = SO_TC_UNSPEC;
1011 
1012 	*out_netsvctype = _NET_SERVICE_TYPE_UNSPEC;
1013 
1014 	for (cm = M_FIRST_CMSGHDR(control);
1015 	    is_cmsg_valid(control, cm);
1016 	    cm = M_NXT_CMSGHDR(control, cm)) {
1017 		int val;
1018 
1019 		if (cm->cmsg_level != SOL_SOCKET ||
1020 		    cm->cmsg_len != CMSG_LEN(sizeof(int))) {
1021 			continue;
1022 		}
1023 		val = *(int *)(void *)CMSG_DATA(cm);
1024 		/*
1025 		 * The first valid option wins
1026 		 */
1027 		switch (cm->cmsg_type) {
1028 		case SO_TRAFFIC_CLASS:
1029 			if (SO_VALID_TC(val)) {
1030 				sotc = val;
1031 				return sotc;
1032 				/* NOT REACHED */
1033 			} else if (val < SO_TC_NET_SERVICE_OFFSET) {
1034 				break;
1035 			}
1036 			/*
1037 			 * Handle the case SO_NET_SERVICE_TYPE values are
1038 			 * passed using SO_TRAFFIC_CLASS
1039 			 */
1040 			val = val - SO_TC_NET_SERVICE_OFFSET;
1041 			OS_FALLTHROUGH;
1042 		case SO_NET_SERVICE_TYPE:
1043 			if (!IS_VALID_NET_SERVICE_TYPE(val)) {
1044 				break;
1045 			}
1046 			*out_netsvctype = val;
1047 			sotc = sotc_by_netservicetype[val];
1048 			return sotc;
1049 		/* NOT REACHED */
1050 		default:
1051 			break;
1052 		}
1053 	}
1054 
1055 	return sotc;
1056 }
1057 
1058 __private_extern__ int
so_tos_from_control(struct mbuf * control)1059 so_tos_from_control(struct mbuf *control)
1060 {
1061 	struct cmsghdr *cm;
1062 	int tos = IPTOS_UNSPEC;
1063 
1064 	for (cm = M_FIRST_CMSGHDR(control);
1065 	    is_cmsg_valid(control, cm);
1066 	    cm = M_NXT_CMSGHDR(control, cm)) {
1067 		if (cm->cmsg_len != CMSG_LEN(sizeof(int))) {
1068 			continue;
1069 		}
1070 
1071 		if ((cm->cmsg_level == IPPROTO_IP &&
1072 		    cm->cmsg_type == IP_TOS) ||
1073 		    (cm->cmsg_level == IPPROTO_IPV6 &&
1074 		    cm->cmsg_type == IPV6_TCLASS)) {
1075 			tos = *(int *)(void *)CMSG_DATA(cm) & IPTOS_MASK;
1076 			/* The first valid option wins */
1077 			break;
1078 		}
1079 	}
1080 
1081 	return tos;
1082 }
1083 
1084 __private_extern__ void
so_recv_data_stat(struct socket * so,struct mbuf * m,size_t off)1085 so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off)
1086 {
1087 	uint32_t mtc = m_get_traffic_class(m);
1088 
1089 	if (mtc >= SO_TC_STATS_MAX) {
1090 		mtc = MBUF_TC_BE;
1091 	}
1092 
1093 	so->so_tc_stats[mtc].rxpackets += 1;
1094 	so->so_tc_stats[mtc].rxbytes +=
1095 	    ((m->m_flags & M_PKTHDR) ? m->m_pkthdr.len : 0) + off;
1096 }
1097 
1098 __private_extern__ void
so_inc_recv_data_stat(struct socket * so,size_t pkts,size_t bytes,uint32_t mtc)1099 so_inc_recv_data_stat(struct socket *so, size_t pkts, size_t bytes,
1100     uint32_t mtc)
1101 {
1102 	if (mtc >= SO_TC_STATS_MAX) {
1103 		mtc = MBUF_TC_BE;
1104 	}
1105 
1106 	so->so_tc_stats[mtc].rxpackets += pkts;
1107 	so->so_tc_stats[mtc].rxbytes += bytes;
1108 }
1109 
1110 static inline int
so_throttle_best_effort(struct socket * so,struct ifnet * ifp)1111 so_throttle_best_effort(struct socket *so, struct ifnet *ifp)
1112 {
1113 	uint32_t uptime = (uint32_t)net_uptime();
1114 	return soissrcbesteffort(so) &&
1115 	       net_io_policy_throttle_best_effort == 1 &&
1116 	       ifp->if_rt_sendts > 0 &&
1117 	       (int)(uptime - ifp->if_rt_sendts) <= TCP_BG_SWITCH_TIME;
1118 }
1119 
1120 __private_extern__ void
set_tcp_stream_priority(struct socket * so)1121 set_tcp_stream_priority(struct socket *so)
1122 {
1123 	struct inpcb *inp = sotoinpcb(so);
1124 	struct tcpcb *tp = intotcpcb(inp);
1125 	struct ifnet *outifp;
1126 	u_char old_cc = tp->tcp_cc_index;
1127 	int recvbg = IS_TCP_RECV_BG(so);
1128 	bool is_local = false, fg_active = false;
1129 	uint32_t uptime;
1130 
1131 	VERIFY((SOCK_CHECK_DOM(so, PF_INET) ||
1132 	    SOCK_CHECK_DOM(so, PF_INET6)) &&
1133 	    SOCK_CHECK_TYPE(so, SOCK_STREAM) &&
1134 	    SOCK_CHECK_PROTO(so, IPPROTO_TCP));
1135 
1136 	/* Return if the socket is in a terminal state */
1137 	if (inp->inp_state == INPCB_STATE_DEAD) {
1138 		return;
1139 	}
1140 
1141 	outifp = inp->inp_last_outifp;
1142 	uptime = (uint32_t)net_uptime();
1143 
1144 	/*
1145 	 * If the socket was marked as a background socket or if the
1146 	 * traffic class is set to background with traffic class socket
1147 	 * option then make both send and recv side of the stream to be
1148 	 * background. The variable sotcdb which can be set with sysctl
1149 	 * is used to disable these settings for testing.
1150 	 */
1151 	if (outifp == NULL || (outifp->if_flags & IFF_LOOPBACK)) {
1152 		is_local = true;
1153 	}
1154 
1155 	/* Check if there has been recent foreground activity */
1156 	if (outifp != NULL) {
1157 		/*
1158 		 * If the traffic source is background, check if
1159 		 * there is recent foreground activity which should
1160 		 * continue to keep the traffic source as background.
1161 		 * Otherwise, we can switch the traffic source to
1162 		 * foreground.
1163 		 */
1164 		if (soissrcbackground(so) && outifp->if_fg_sendts > 0 &&
1165 		    (int)(uptime - outifp->if_fg_sendts) <= TCP_BG_SWITCH_TIME) {
1166 			fg_active = true;
1167 		}
1168 
1169 		/*
1170 		 * The traffic source is best-effort -- check if
1171 		 * the policy to throttle best effort is enabled
1172 		 * and there was realtime activity on this
1173 		 * interface recently. If this is true, enable
1174 		 * algorithms that respond to increased latency
1175 		 * on best-effort traffic.
1176 		 */
1177 		if (so_throttle_best_effort(so, outifp)) {
1178 			fg_active = true;
1179 		}
1180 	}
1181 
1182 	/*
1183 	 * System initiated background traffic like cloud uploads should
1184 	 * always use background delay sensitive algorithms. This will
1185 	 * make the stream more responsive to other streams on the user's
1186 	 * network and it will minimize latency induced.
1187 	 */
1188 	if (fg_active || IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
1189 		/*
1190 		 * If the interface that the connection is using is
1191 		 * loopback, do not use background congestion
1192 		 * control algorithm.
1193 		 *
1194 		 * If there has been recent foreground activity or if there
1195 		 * was an indication that a real time foreground application
1196 		 * is going to use networking (net_io_policy_throttled),
1197 		 * switch the background and best effort streams to use background
1198 		 * congestion control algorithm.
1199 		 */
1200 		if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0 || is_local) {
1201 			if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) {
1202 				tcp_set_foreground_cc(so);
1203 			}
1204 		} else {
1205 			if (old_cc != TCP_CC_ALGO_BACKGROUND_INDEX) {
1206 				tcp_set_background_cc(so);
1207 			}
1208 		}
1209 
1210 		/* Set receive side background flags */
1211 		if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0 || is_local) {
1212 			tcp_clear_recv_bg(so);
1213 		} else {
1214 			tcp_set_recv_bg(so);
1215 		}
1216 	} else {
1217 		/*
1218 		 * If there is no recent foreground activity, even the
1219 		 * background flows can use foreground congestion controller.
1220 		 */
1221 		tcp_clear_recv_bg(so);
1222 		if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) {
1223 			tcp_set_foreground_cc(so);
1224 		}
1225 	}
1226 
1227 	if (old_cc != tp->tcp_cc_index || recvbg != IS_TCP_RECV_BG(so)) {
1228 		SOTHROTTLELOG("throttle[%d]: so 0x%llx [%d,%d] TCP %s send; "
1229 		    "%s recv\n", so->last_pid,
1230 		    (uint64_t)VM_KERNEL_ADDRPERM(so),
1231 		    SOCK_DOM(so), SOCK_TYPE(so),
1232 		    (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) ?
1233 		    "background" : "foreground",
1234 		    IS_TCP_RECV_BG(so) ? "background" : "foreground");
1235 	}
1236 }
1237 
1238 /*
1239  * Set traffic class to an IPv4 or IPv6 packet
1240  * - mark the mbuf
1241  * - set the DSCP code following the WMM mapping
1242  */
1243 __private_extern__ void
set_packet_service_class(struct mbuf * m,struct socket * so,int sotc,uint32_t flags)1244 set_packet_service_class(struct mbuf *m, struct socket *so,
1245     int sotc, uint32_t flags)
1246 {
1247 	mbuf_svc_class_t msc = MBUF_SC_BE;         /* Best effort by default */
1248 	struct inpcb *inp = sotoinpcb(so); /* in6pcb and inpcb are the same */
1249 
1250 	if (!(m->m_flags & M_PKTHDR)) {
1251 		return;
1252 	}
1253 
1254 	/*
1255 	 * Here is the precedence:
1256 	 * 1) TRAFFIC_MGT_SO_BACKGROUND trumps all
1257 	 * 2) Traffic class passed via ancillary data to sendmsdg(2)
1258 	 * 3) Traffic class socket option last
1259 	 */
1260 	if (sotc != SO_TC_UNSPEC) {
1261 		VERIFY(SO_VALID_TC(sotc));
1262 		msc = so_tc2msc(sotc);
1263 		/* Assert because tc must have been valid */
1264 		VERIFY(MBUF_VALID_SC(msc));
1265 	}
1266 
1267 	/*
1268 	 * If TRAFFIC_MGT_SO_BACKGROUND is set or policy to throttle
1269 	 * best effort is set, depress the priority.
1270 	 */
1271 	if (!IS_MBUF_SC_BACKGROUND(msc) && soisthrottled(so)) {
1272 		msc = MBUF_SC_BK;
1273 	}
1274 
1275 	if (IS_MBUF_SC_BESTEFFORT(msc) && inp->inp_last_outifp != NULL &&
1276 	    so_throttle_best_effort(so, inp->inp_last_outifp)) {
1277 		msc = MBUF_SC_BK;
1278 	}
1279 
1280 	if (soissrcbackground(so)) {
1281 		m->m_pkthdr.pkt_flags |= PKTF_SO_BACKGROUND;
1282 	}
1283 
1284 	if (soissrcrealtime(so) || IS_MBUF_SC_REALTIME(msc)) {
1285 		m->m_pkthdr.pkt_flags |= PKTF_SO_REALTIME;
1286 	}
1287 	/*
1288 	 * Set the traffic class in the mbuf packet header svc field
1289 	 */
1290 	if (sotcdb & SOTCDB_NO_MTC) {
1291 		goto no_mbtc;
1292 	}
1293 
1294 	/*
1295 	 * Elevate service class if the packet is a pure TCP ACK.
1296 	 * We can do this only when the flow is not a background
1297 	 * flow and the outgoing interface supports
1298 	 * transmit-start model.
1299 	 */
1300 	if (!IS_MBUF_SC_BACKGROUND(msc) &&
1301 	    (flags & (PKT_SCF_TCP_ACK | PKT_SCF_TCP_SYN)) != 0) {
1302 		msc = MBUF_SC_CTL;
1303 	}
1304 
1305 	(void) m_set_service_class(m, msc);
1306 
1307 	/*
1308 	 * Set the privileged traffic auxiliary flag if applicable,
1309 	 * or clear it.
1310 	 */
1311 	if (!(sotcdb & SOTCDB_NO_PRIVILEGED) && soisprivilegedtraffic(so) &&
1312 	    msc != MBUF_SC_UNSPEC) {
1313 		m->m_pkthdr.pkt_flags |= PKTF_PRIO_PRIVILEGED;
1314 	} else {
1315 		m->m_pkthdr.pkt_flags &= ~PKTF_PRIO_PRIVILEGED;
1316 	}
1317 
1318 no_mbtc:
1319 	/*
1320 	 * For TCP with background traffic class switch CC algo based on sysctl
1321 	 */
1322 	if (so->so_type == SOCK_STREAM) {
1323 		set_tcp_stream_priority(so);
1324 	}
1325 
1326 	so_tc_update_stats(m, so, msc);
1327 }
1328 
1329 __private_extern__ void
so_tc_update_stats(struct mbuf * m,struct socket * so,mbuf_svc_class_t msc)1330 so_tc_update_stats(struct mbuf *m, struct socket *so, mbuf_svc_class_t msc)
1331 {
1332 	mbuf_traffic_class_t mtc;
1333 
1334 	/*
1335 	 * Assume socket and mbuf traffic class values are the same
1336 	 * Also assume the socket lock is held.  Note that the stats
1337 	 * at the socket layer are reduced down to the legacy traffic
1338 	 * classes; we could/should potentially expand so_tc_stats[].
1339 	 */
1340 	mtc = MBUF_SC2TC(msc);
1341 	VERIFY(mtc < SO_TC_STATS_MAX);
1342 	so->so_tc_stats[mtc].txpackets += 1;
1343 	so->so_tc_stats[mtc].txbytes += m->m_pkthdr.len;
1344 }
1345 
1346 __private_extern__ mbuf_svc_class_t
so_tc2msc(int tc)1347 so_tc2msc(int tc)
1348 {
1349 	mbuf_svc_class_t msc;
1350 
1351 	switch (tc) {
1352 	case SO_TC_BK_SYS:
1353 		msc = MBUF_SC_BK_SYS;
1354 		break;
1355 	case SO_TC_BK:
1356 	case _SO_TC_BK:
1357 		msc = MBUF_SC_BK;
1358 		break;
1359 	case SO_TC_BE:
1360 		msc = MBUF_SC_BE;
1361 		break;
1362 	case SO_TC_RD:
1363 		msc = MBUF_SC_RD;
1364 		break;
1365 	case SO_TC_OAM:
1366 		msc = MBUF_SC_OAM;
1367 		break;
1368 	case SO_TC_AV:
1369 		msc = MBUF_SC_AV;
1370 		break;
1371 	case SO_TC_RV:
1372 		msc = MBUF_SC_RV;
1373 		break;
1374 	case SO_TC_VI:
1375 	case _SO_TC_VI:
1376 		msc = MBUF_SC_VI;
1377 		break;
1378 	case SO_TC_NETSVC_SIG:
1379 		msc = MBUF_SC_SIG;
1380 		break;
1381 	case SO_TC_VO:
1382 	case _SO_TC_VO:
1383 		msc = MBUF_SC_VO;
1384 		break;
1385 	case SO_TC_CTL:
1386 		msc = MBUF_SC_CTL;
1387 		break;
1388 	case SO_TC_ALL:
1389 	default:
1390 		msc = MBUF_SC_UNSPEC;
1391 		break;
1392 	}
1393 
1394 	return msc;
1395 }
1396 
1397 __private_extern__ int
so_svc2tc(mbuf_svc_class_t svc)1398 so_svc2tc(mbuf_svc_class_t svc)
1399 {
1400 	switch (svc) {
1401 	case MBUF_SC_BK_SYS:
1402 		return SO_TC_BK_SYS;
1403 	case MBUF_SC_BK:
1404 		return SO_TC_BK;
1405 	case MBUF_SC_BE:
1406 		return SO_TC_BE;
1407 	case MBUF_SC_RD:
1408 		return SO_TC_RD;
1409 	case MBUF_SC_OAM:
1410 		return SO_TC_OAM;
1411 	case MBUF_SC_AV:
1412 		return SO_TC_AV;
1413 	case MBUF_SC_RV:
1414 		return SO_TC_RV;
1415 	case MBUF_SC_VI:
1416 		return SO_TC_VI;
1417 	case MBUF_SC_SIG:
1418 		return SO_TC_NETSVC_SIG;
1419 	case MBUF_SC_VO:
1420 		return SO_TC_VO;
1421 	case MBUF_SC_CTL:
1422 		return SO_TC_CTL;
1423 	case MBUF_SC_UNSPEC:
1424 	default:
1425 		return SO_TC_BE;
1426 	}
1427 }
1428 
1429 static size_t
sotc_index(int sotc)1430 sotc_index(int sotc)
1431 {
1432 	switch (sotc) {
1433 	case SO_TC_BK_SYS:
1434 		return SOTCIX_BK_SYS;
1435 	case _SO_TC_BK:
1436 	case SO_TC_BK:
1437 		return SOTCIX_BK;
1438 
1439 	case SO_TC_BE:
1440 		return SOTCIX_BE;
1441 	case SO_TC_RD:
1442 		return SOTCIX_RD;
1443 	case SO_TC_OAM:
1444 		return SOTCIX_OAM;
1445 
1446 	case SO_TC_AV:
1447 		return SOTCIX_AV;
1448 	case SO_TC_RV:
1449 		return SOTCIX_RV;
1450 	case _SO_TC_VI:
1451 	case SO_TC_VI:
1452 		return SOTCIX_VI;
1453 
1454 	case _SO_TC_VO:
1455 	case SO_TC_VO:
1456 		return SOTCIX_VO;
1457 	case SO_TC_CTL:
1458 		return SOTCIX_CTL;
1459 
1460 	default:
1461 		break;
1462 	}
1463 	/*
1464 	 * Unknown traffic class value
1465 	 */
1466 	return SIZE_T_MAX;
1467 }
1468 
1469 uint8_t
fastlane_sc_to_dscp(uint32_t svc_class)1470 fastlane_sc_to_dscp(uint32_t svc_class)
1471 {
1472 	uint8_t dscp = _DSCP_DF;
1473 
1474 	switch (svc_class) {
1475 	case MBUF_SC_BK_SYS:
1476 	case MBUF_SC_BK:
1477 		dscp = _DSCP_AF11;
1478 		break;
1479 
1480 	case MBUF_SC_BE:
1481 		dscp = _DSCP_DF;
1482 		break;
1483 	case MBUF_SC_RD:
1484 		dscp = _DSCP_AF21;
1485 		break;
1486 	case MBUF_SC_OAM:
1487 		dscp = _DSCP_CS2;
1488 		break;
1489 
1490 	case MBUF_SC_AV:
1491 		dscp = _DSCP_AF31;
1492 		break;
1493 	case MBUF_SC_RV:
1494 		dscp = _DSCP_CS4;
1495 		break;
1496 	case MBUF_SC_VI:
1497 		dscp = _DSCP_AF41;
1498 		break;
1499 	case MBUF_SC_SIG:
1500 		dscp = _DSCP_CS3;
1501 		break;
1502 
1503 	case MBUF_SC_VO:
1504 		dscp = _DSCP_EF;
1505 		break;
1506 	case MBUF_SC_CTL:
1507 		dscp = _DSCP_DF;
1508 		break;
1509 	default:
1510 		dscp = _DSCP_DF;
1511 		break;
1512 	}
1513 
1514 	return dscp;
1515 }
1516 
1517 uint8_t
rfc4594_sc_to_dscp(uint32_t svc_class)1518 rfc4594_sc_to_dscp(uint32_t svc_class)
1519 {
1520 	uint8_t dscp = _DSCP_DF;
1521 
1522 	switch (svc_class) {
1523 	case MBUF_SC_BK_SYS:            /* Low-Priority Data */
1524 	case MBUF_SC_BK:
1525 		dscp = _DSCP_CS1;
1526 		break;
1527 
1528 	case MBUF_SC_BE:                /* Standard */
1529 		dscp = _DSCP_DF;
1530 		break;
1531 	case MBUF_SC_RD:                /* Low-Latency Data */
1532 		dscp = _DSCP_AF21;
1533 		break;
1534 
1535 	/* SVC_CLASS Not Defined:  High-Throughput Data */
1536 
1537 	case MBUF_SC_OAM:               /* OAM */
1538 		dscp = _DSCP_CS2;
1539 		break;
1540 
1541 	/* SVC_CLASS Not Defined:  Broadcast Video */
1542 
1543 	case MBUF_SC_AV:                /* Multimedia Streaming */
1544 		dscp = _DSCP_AF31;
1545 		break;
1546 	case MBUF_SC_RV:                /* Real-Time Interactive */
1547 		dscp = _DSCP_CS4;
1548 		break;
1549 	case MBUF_SC_VI:                /* Multimedia Conferencing */
1550 		dscp = _DSCP_AF41;
1551 		break;
1552 	case MBUF_SC_SIG:               /* Signaling */
1553 		dscp = _DSCP_CS5;
1554 		break;
1555 
1556 	case MBUF_SC_VO:                /* Telephony */
1557 		dscp = _DSCP_EF;
1558 		break;
1559 	case MBUF_SC_CTL:               /* Network Control*/
1560 		dscp = _DSCP_CS6;
1561 		break;
1562 	default:
1563 		dscp = _DSCP_DF;
1564 		break;
1565 	}
1566 
1567 	return dscp;
1568 }
1569 
1570 mbuf_traffic_class_t
rfc4594_dscp_to_tc(uint8_t dscp)1571 rfc4594_dscp_to_tc(uint8_t dscp)
1572 {
1573 	mbuf_traffic_class_t tc = MBUF_TC_BE;
1574 
1575 	switch (dscp) {
1576 	case _DSCP_CS1:
1577 		tc = MBUF_TC_BK;
1578 		break;
1579 	case _DSCP_DF:
1580 	case _DSCP_AF21:
1581 	case _DSCP_CS2:
1582 		tc = MBUF_TC_BE;
1583 		break;
1584 	case _DSCP_AF31:
1585 	case _DSCP_CS4:
1586 	case _DSCP_AF41:
1587 	case _DSCP_CS5:
1588 		tc = MBUF_TC_VI;
1589 		break;
1590 	case _DSCP_EF:
1591 	case _DSCP_CS6:
1592 		tc = MBUF_TC_VO;
1593 		break;
1594 	default:
1595 		tc = MBUF_TC_BE;
1596 		break;
1597 	}
1598 
1599 	return tc;
1600 }
1601 
1602 /*
1603  * Pass NULL ifp for default map
1604  */
1605 static errno_t
set_netsvctype_dscp_map(struct net_qos_dscp_map * net_qos_dscp_map,const struct netsvctype_dscp_map * netsvctype_dscp_map)1606 set_netsvctype_dscp_map(struct net_qos_dscp_map *net_qos_dscp_map,
1607     const struct netsvctype_dscp_map *netsvctype_dscp_map)
1608 {
1609 	size_t i;
1610 	int netsvctype;
1611 
1612 	/*
1613 	 * Do not accept more that max number of distinct DSCPs
1614 	 */
1615 	if (net_qos_dscp_map == NULL || netsvctype_dscp_map == NULL) {
1616 		return EINVAL;
1617 	}
1618 
1619 	/*
1620 	 * Validate input parameters
1621 	 */
1622 	for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1623 		if (!IS_VALID_NET_SERVICE_TYPE(netsvctype_dscp_map[i].netsvctype)) {
1624 			return EINVAL;
1625 		}
1626 		if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
1627 			return EINVAL;
1628 		}
1629 	}
1630 
1631 	for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1632 		netsvctype = netsvctype_dscp_map[i].netsvctype;
1633 
1634 		net_qos_dscp_map->netsvctype_to_dscp[netsvctype] =
1635 		    netsvctype_dscp_map[i].dscp;
1636 	}
1637 	for (netsvctype = 0; netsvctype < _NET_SERVICE_TYPE_COUNT; netsvctype++) {
1638 		switch (netsvctype) {
1639 		case NET_SERVICE_TYPE_BE:
1640 		case NET_SERVICE_TYPE_BK:
1641 		case NET_SERVICE_TYPE_VI:
1642 		case NET_SERVICE_TYPE_VO:
1643 		case NET_SERVICE_TYPE_RV:
1644 		case NET_SERVICE_TYPE_AV:
1645 		case NET_SERVICE_TYPE_OAM:
1646 		case NET_SERVICE_TYPE_RD: {
1647 			size_t sotcix;
1648 
1649 			sotcix = sotc_index(sotc_by_netservicetype[netsvctype]);
1650 			if (sotcix != SIZE_T_MAX) {
1651 				net_qos_dscp_map->sotc_to_dscp[sotcix]  =
1652 				    netsvctype_dscp_map[netsvctype].dscp;
1653 			}
1654 			break;
1655 		}
1656 		case  NET_SERVICE_TYPE_SIG:
1657 			/* Signaling does not have its own traffic class */
1658 			break;
1659 		default:
1660 			/* We should not be here */
1661 			ASSERT(0);
1662 		}
1663 	}
1664 	if (net_qos_dscp_map == &fastlane_net_qos_dscp_map) {
1665 		/* Network control socket traffic class is always best effort for fastlane*/
1666 		net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_DF;
1667 	} else {
1668 		net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_CS6;
1669 	}
1670 
1671 	/* Background system socket traffic class DSCP same as background */
1672 	net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK_SYS] =
1673 	    net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK];
1674 
1675 	return 0;
1676 }
1677 
1678 static size_t
get_netsvctype_dscp_map(struct netsvctype_dscp_map * netsvctype_dscp_map)1679 get_netsvctype_dscp_map(struct netsvctype_dscp_map *netsvctype_dscp_map)
1680 {
1681 	struct net_qos_dscp_map *net_qos_dscp_map;
1682 	int i;
1683 
1684 	net_qos_dscp_map = &fastlane_net_qos_dscp_map;
1685 
1686 	for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1687 		netsvctype_dscp_map[i].netsvctype = i;
1688 		netsvctype_dscp_map[i].dscp = net_qos_dscp_map->netsvctype_to_dscp[i];
1689 	}
1690 
1691 	return i * sizeof(struct netsvctype_dscp_map);
1692 }
1693 
1694 void
net_qos_map_init()1695 net_qos_map_init()
1696 {
1697 	errno_t error;
1698 
1699 	error = set_netsvctype_dscp_map(&fastlane_net_qos_dscp_map,
1700 	    fastlane_netsvctype_dscp_map);
1701 	ASSERT(error == 0);
1702 
1703 	error = set_netsvctype_dscp_map(&rfc4594_net_qos_dscp_map,
1704 	    rfc4594_netsvctype_dscp_map);
1705 	ASSERT(error == 0);
1706 
1707 #if (DEBUG || DEVELOPMENT)
1708 	error = set_netsvctype_dscp_map(&custom_net_qos_dscp_map,
1709 	    rfc4594_netsvctype_dscp_map);
1710 	ASSERT(error == 0);
1711 
1712 #endif /* (DEBUG || DEVELOPMENT) */
1713 
1714 	set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
1715 }
1716 
1717 int
1718 sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS
1719 {
1720 #pragma unused(oidp, arg1, arg2)
1721 	int error = 0;
1722 
1723 	if (req->oldptr == USER_ADDR_NULL) {
1724 		req->oldidx =
1725 		    _NET_SERVICE_TYPE_COUNT * sizeof(struct netsvctype_dscp_map);
1726 	} else if (req->oldlen > 0) {
1727 		struct netsvctype_dscp_map netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {};
1728 		size_t len;
1729 
1730 		len = get_netsvctype_dscp_map(netsvctype_dscp_map);
1731 
1732 		error = SYSCTL_OUT(req, netsvctype_dscp_map,
1733 		    MIN(len, req->oldlen));
1734 		if (error != 0) {
1735 			goto done;
1736 		}
1737 	}
1738 
1739 	if (req->newptr != USER_ADDR_NULL) {
1740 		error = EPERM;
1741 	}
1742 done:
1743 	return error;
1744 }
1745 
1746 __private_extern__ errno_t
set_packet_qos(struct mbuf * m,struct ifnet * ifp,boolean_t qos_allowed,int sotc,int netsvctype,uint8_t * dscp_inout)1747 set_packet_qos(struct mbuf *m, struct ifnet *ifp, boolean_t qos_allowed,
1748     int sotc, int netsvctype, uint8_t *dscp_inout)
1749 {
1750 	if (ifp == NULL || dscp_inout == NULL) {
1751 		return EINVAL;
1752 	}
1753 
1754 	if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0 &&
1755 	    ifp->if_qosmarking_mode != IFRTYPE_QOSMARKING_MODE_NONE) {
1756 		uint8_t dscp;
1757 		const struct net_qos_dscp_map *net_qos_dscp_map = NULL;
1758 
1759 		switch (ifp->if_qosmarking_mode) {
1760 		case IFRTYPE_QOSMARKING_FASTLANE:
1761 			net_qos_dscp_map = &fastlane_net_qos_dscp_map;
1762 			break;
1763 		case IFRTYPE_QOSMARKING_RFC4594:
1764 			net_qos_dscp_map = &rfc4594_net_qos_dscp_map;
1765 			break;
1766 #if (DEBUG || DEVELOPMENT)
1767 		case IFRTYPE_QOSMARKING_CUSTOM:
1768 			net_qos_dscp_map = &custom_net_qos_dscp_map;
1769 			break;
1770 #endif /* (DEBUG || DEVELOPMENT) */
1771 		default:
1772 			panic("invalid QoS marking type");
1773 			/* NOTREACHED */
1774 		}
1775 
1776 		/*
1777 		 * When on a Fastlane network, IP_TOS/IPV6_TCLASS are no-ops
1778 		 */
1779 		dscp = _DSCP_DF;
1780 
1781 		/*
1782 		 * For DSCP use the network service type is specified, otherwise
1783 		 * use the socket traffic class
1784 		 *
1785 		 * When not whitelisted by the policy, set DSCP only for best
1786 		 * effort and background, and set the mbuf service class to
1787 		 * best effort as well so the packet will be queued and
1788 		 * scheduled at a lower priority.
1789 		 * We still want to prioritize control traffic on the interface
1790 		 * so we do not change the mbuf service class for SO_TC_CTL
1791 		 */
1792 		if (IS_VALID_NET_SERVICE_TYPE(netsvctype) &&
1793 		    netsvctype != NET_SERVICE_TYPE_BE) {
1794 			dscp = net_qos_dscp_map->netsvctype_to_dscp[netsvctype];
1795 
1796 			if (qos_allowed == FALSE &&
1797 			    netsvctype != NET_SERVICE_TYPE_BE &&
1798 			    netsvctype != NET_SERVICE_TYPE_BK) {
1799 				dscp = _DSCP_DF;
1800 				if (sotc != SO_TC_CTL) {
1801 					m_set_service_class(m, MBUF_SC_BE);
1802 				}
1803 			}
1804 		} else if (sotc != SO_TC_UNSPEC) {
1805 			size_t sotcix = sotc_index(sotc);
1806 			if (sotcix != SIZE_T_MAX) {
1807 				dscp = net_qos_dscp_map->sotc_to_dscp[sotcix];
1808 
1809 				if (qos_allowed == FALSE && sotc != SO_TC_BE &&
1810 				    sotc != SO_TC_BK && sotc != SO_TC_BK_SYS &&
1811 				    sotc != SO_TC_CTL) {
1812 					dscp = _DSCP_DF;
1813 					if (sotc != SO_TC_CTL) {
1814 						m_set_service_class(m, MBUF_SC_BE);
1815 					}
1816 				}
1817 			}
1818 		}
1819 		if (net_qos_verbose != 0) {
1820 			printf("%s qos_allowed %d sotc %u netsvctype %u dscp %u\n",
1821 			    __func__, qos_allowed, sotc, netsvctype, dscp);
1822 		}
1823 
1824 		if (*dscp_inout != dscp) {
1825 			*dscp_inout = dscp;
1826 		}
1827 	} else if (*dscp_inout != _DSCP_DF && IFNET_IS_WIFI_INFRA(ifp)) {
1828 		mbuf_svc_class_t msc = m_get_service_class(m);
1829 
1830 		/*
1831 		 * For WiFi infra, when the mbuf service class is best effort
1832 		 * and the DSCP is not default, set the service class based
1833 		 * on DSCP
1834 		 */
1835 		if (msc == MBUF_SC_BE) {
1836 			msc = wifi_dscp_to_msc_array[*dscp_inout];
1837 
1838 			if (msc != MBUF_SC_BE) {
1839 				m_set_service_class(m, msc);
1840 
1841 				if (net_qos_verbose != 0) {
1842 					printf("%s set msc %u for dscp %u\n",
1843 					    __func__, msc, *dscp_inout);
1844 				}
1845 			}
1846 		}
1847 	}
1848 
1849 	return 0;
1850 }
1851 
1852 static void
set_dscp_to_wifi_ac_map(const struct dcsp_msc_map * map,int clear)1853 set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *map, int clear)
1854 {
1855 	int i;
1856 
1857 	if (clear) {
1858 		bzero(wifi_dscp_to_msc_array, sizeof(wifi_dscp_to_msc_array));
1859 	}
1860 
1861 	for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
1862 		const struct dcsp_msc_map *elem = map + i;
1863 
1864 		if (elem->dscp > _MAX_DSCP || elem->msc == MBUF_SC_UNSPEC) {
1865 			break;
1866 		}
1867 		switch (elem->msc) {
1868 		case MBUF_SC_BK_SYS:
1869 		case MBUF_SC_BK:
1870 			wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BK;
1871 			break;
1872 		default:
1873 		case MBUF_SC_BE:
1874 		case MBUF_SC_RD:
1875 		case MBUF_SC_OAM:
1876 			wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BE;
1877 			break;
1878 		case MBUF_SC_AV:
1879 		case MBUF_SC_RV:
1880 		case MBUF_SC_VI:
1881 			wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VI;
1882 			break;
1883 		case MBUF_SC_VO:
1884 		case MBUF_SC_CTL:
1885 			wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VO;
1886 			break;
1887 		}
1888 	}
1889 }
1890 
1891 static errno_t
dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map * netsvctype_dscp_map,size_t count,struct dcsp_msc_map * dcsp_msc_map)1892 dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *netsvctype_dscp_map,
1893     size_t count, struct dcsp_msc_map *dcsp_msc_map)
1894 {
1895 	errno_t error = 0;
1896 	uint32_t i;
1897 
1898 	/*
1899 	 * Validate input parameters
1900 	 */
1901 	for (i = 0; i < count; i++) {
1902 		if (!SO_VALID_TC(netsvctype_dscp_map[i].netsvctype)) {
1903 			error = EINVAL;
1904 			goto done;
1905 		}
1906 		if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
1907 			error = EINVAL;
1908 			goto done;
1909 		}
1910 	}
1911 
1912 	bzero(dcsp_msc_map, DSCP_ARRAY_SIZE * sizeof(struct dcsp_msc_map));
1913 
1914 	for (i = 0; i < count; i++) {
1915 		dcsp_msc_map[i].dscp = netsvctype_dscp_map[i].dscp;
1916 		dcsp_msc_map[i].msc = so_tc2msc(netsvctype_dscp_map[i].netsvctype);
1917 	}
1918 done:
1919 	return error;
1920 }
1921 
1922 int
1923 sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
1924 {
1925 #pragma unused(oidp, arg1, arg2)
1926 	int error = 0;
1927 	size_t len = DSCP_ARRAY_SIZE * sizeof(struct netsvctype_dscp_map);
1928 	struct netsvctype_dscp_map netsvctype_dscp_map[DSCP_ARRAY_SIZE] = {};
1929 	struct dcsp_msc_map dcsp_msc_map[DSCP_ARRAY_SIZE];
1930 	size_t count;
1931 
1932 	if (req->oldptr == USER_ADDR_NULL) {
1933 		req->oldidx = len;
1934 	} else if (req->oldlen > 0) {
1935 		uint8_t i;
1936 
1937 		for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
1938 			netsvctype_dscp_map[i].dscp = i;
1939 			netsvctype_dscp_map[i].netsvctype =
1940 			    so_svc2tc(wifi_dscp_to_msc_array[i]);
1941 		}
1942 		error = SYSCTL_OUT(req, netsvctype_dscp_map,
1943 		    MIN(len, req->oldlen));
1944 		if (error != 0) {
1945 			goto done;
1946 		}
1947 	}
1948 
1949 	if (req->newptr == USER_ADDR_NULL) {
1950 		goto done;
1951 	}
1952 
1953 	error = proc_suser(current_proc());
1954 	if (error != 0) {
1955 		goto done;
1956 	}
1957 
1958 	/*
1959 	 * Check input length
1960 	 */
1961 	if (req->newlen > len) {
1962 		error = EINVAL;
1963 		goto done;
1964 	}
1965 	/*
1966 	 * Cap the number of entries to copy from input buffer
1967 	 */
1968 	if (len > req->newlen) {
1969 		len = req->newlen;
1970 	}
1971 	error = SYSCTL_IN(req, netsvctype_dscp_map, len);
1972 	if (error != 0) {
1973 		goto done;
1974 	}
1975 	count = len / sizeof(struct netsvctype_dscp_map);
1976 	bzero(dcsp_msc_map, sizeof(dcsp_msc_map));
1977 	error = dscp_msc_map_from_netsvctype_dscp_map(netsvctype_dscp_map, count,
1978 	    dcsp_msc_map);
1979 	if (error != 0) {
1980 		goto done;
1981 	}
1982 	set_dscp_to_wifi_ac_map(dcsp_msc_map, 0);
1983 done:
1984 	return error;
1985 }
1986 
1987 int
1988 sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
1989 {
1990 #pragma unused(oidp, arg1, arg2)
1991 	int error = 0;
1992 	int val = 0;
1993 
1994 	error = sysctl_handle_int(oidp, &val, 0, req);
1995 	if (error || !req->newptr) {
1996 		return error;
1997 	}
1998 	if (req->newptr == USER_ADDR_NULL) {
1999 		return 0;
2000 	}
2001 	error = proc_suser(current_proc());
2002 	if (error != 0) {
2003 		return error;
2004 	}
2005 
2006 	set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
2007 
2008 	return 0;
2009 }
2010 
2011 /*
2012  * Returns whether a large upload or download transfer should be marked as
2013  * BK service type for network activity. This is a system level
2014  * hint/suggestion to classify application traffic based on statistics
2015  * collected from the current network attachment
2016  *
2017  * Returns 1 for BK and 0 for default
2018  */
2019 
2020 int
net_qos_guideline(struct proc * p,struct net_qos_guideline_args * arg,int * retval)2021 net_qos_guideline(struct proc *p, struct net_qos_guideline_args *arg,
2022     int *retval)
2023 {
2024 #pragma unused(p)
2025 #define RETURN_USE_BK   1
2026 #define RETURN_USE_DEFAULT      0
2027 	struct net_qos_param qos_arg;
2028 	struct ifnet *ipv4_primary, *ipv6_primary;
2029 	int err = 0;
2030 
2031 	if (arg->param == USER_ADDR_NULL || retval == NULL ||
2032 	    arg->param_len != sizeof(qos_arg)) {
2033 		return EINVAL;
2034 	}
2035 	err = copyin(arg->param, (caddr_t) &qos_arg, sizeof(qos_arg));
2036 	if (err != 0) {
2037 		return err;
2038 	}
2039 
2040 	*retval = RETURN_USE_DEFAULT;
2041 	ipv4_primary = ifindex2ifnet[get_primary_ifscope(AF_INET)];
2042 	ipv6_primary = ifindex2ifnet[get_primary_ifscope(AF_INET6)];
2043 
2044 	/*
2045 	 * If either of the interfaces is in Low Internet mode, enable
2046 	 * background delay based algorithms on this transfer
2047 	 */
2048 	if (qos_arg.nq_uplink) {
2049 		if ((ipv4_primary != NULL &&
2050 		    (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_UL)) ||
2051 		    (ipv6_primary != NULL &&
2052 		    (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_UL))) {
2053 			*retval = RETURN_USE_BK;
2054 			return 0;
2055 		}
2056 	} else {
2057 		if ((ipv4_primary != NULL &&
2058 		    (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_DL)) ||
2059 		    (ipv6_primary != NULL &&
2060 		    (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_DL))) {
2061 			*retval = RETURN_USE_BK;
2062 			return 0;
2063 		}
2064 	}
2065 
2066 	/*
2067 	 * Some times IPv4 and IPv6 primary interfaces can be different.
2068 	 * In this case, if either of them is non-cellular, we should mark
2069 	 * the transfer as BK as it can potentially get used based on
2070 	 * the host name resolution
2071 	 */
2072 	if (ipv4_primary != NULL && IFNET_IS_EXPENSIVE(ipv4_primary) &&
2073 	    ipv6_primary != NULL && IFNET_IS_EXPENSIVE(ipv6_primary)) {
2074 		if (qos_arg.nq_use_expensive) {
2075 			return 0;
2076 		} else {
2077 			*retval = RETURN_USE_BK;
2078 			return 0;
2079 		}
2080 	}
2081 	if (ipv4_primary != NULL && IFNET_IS_CONSTRAINED(ipv4_primary) &&
2082 	    ipv6_primary != NULL && IFNET_IS_CONSTRAINED(ipv6_primary)) {
2083 		if (qos_arg.nq_use_constrained) {
2084 			return 0;
2085 		} else {
2086 			*retval = RETURN_USE_BK;
2087 			return 0;
2088 		}
2089 	}
2090 	if (qos_arg.nq_transfer_size >= 5 * 1024 * 1024) {
2091 		*retval = RETURN_USE_BK;
2092 		return 0;
2093 	}
2094 
2095 
2096 #undef  RETURN_USE_BK
2097 #undef  RETURN_USE_DEFAULT
2098 	return 0;
2099 }
2100 
2101 #if (DEBUG || DEVELOPMENT)
2102 /*
2103  * Customizable QoS mapping table
2104  * By default it uses the mapping table for RFC 4594
2105  *
2106  * Notes:
2107  *   BK_SYS is the same as BK
2108  *   CTL cannot be changed and is always _DSCP_CS6
2109  */
2110 SYSCTL_NODE(_net_qos, OID_AUTO, custom,
2111     CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
2112 
2113 SYSCTL_NODE(_net_qos_custom, OID_AUTO, netsvctype_to_dscp,
2114     CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
2115 
2116 static int sysctl_net_qos_custom_netsvctype_to_dscp SYSCTL_HANDLER_ARGS;
2117 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, be,
2118     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2119     0, NET_SERVICE_TYPE_BE, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2120 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, bk,
2121     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2122     0, NET_SERVICE_TYPE_BK, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2123 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, sig,
2124     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2125     0, NET_SERVICE_TYPE_SIG, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2126 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, vi,
2127     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2128     0, NET_SERVICE_TYPE_VI, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2129 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, vo,
2130     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2131     0, NET_SERVICE_TYPE_VO, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2132 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, rv,
2133     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2134     0, NET_SERVICE_TYPE_RV, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2135 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, av,
2136     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2137     0, NET_SERVICE_TYPE_AV, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2138 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, oam,
2139     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2140     0, NET_SERVICE_TYPE_OAM, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2141 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, rd,
2142     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2143     0, NET_SERVICE_TYPE_RD, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2144 
2145 static int sysctl_net_qos_custom_reset SYSCTL_HANDLER_ARGS;
2146 SYSCTL_PROC(_net_qos_custom, OID_AUTO, reset,
2147     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2148     0, 0, sysctl_net_qos_custom_reset, "I", "");
2149 
2150 int
2151 sysctl_net_qos_custom_netsvctype_to_dscp SYSCTL_HANDLER_ARGS
2152 {
2153 #pragma unused(arg1)
2154 	int error = 0;
2155 
2156 	switch (arg2) {
2157 	case NET_SERVICE_TYPE_BE:
2158 	case NET_SERVICE_TYPE_BK:
2159 	case NET_SERVICE_TYPE_SIG:
2160 	case NET_SERVICE_TYPE_VI:
2161 	case NET_SERVICE_TYPE_VO:
2162 	case NET_SERVICE_TYPE_RV:
2163 	case NET_SERVICE_TYPE_AV:
2164 	case NET_SERVICE_TYPE_OAM:
2165 	case NET_SERVICE_TYPE_RD:
2166 		break;
2167 	default:
2168 		os_log(OS_LOG_DEFAULT, "%s: unexpected netsvctype %d",
2169 		    __func__, arg2);
2170 		return EINVAL;
2171 	}
2172 
2173 	int val = custom_net_qos_dscp_map.netsvctype_to_dscp[arg2];
2174 	error = sysctl_handle_int(oidp, &val, 0, req);
2175 	if (error != 0 || req->newptr == USER_ADDR_NULL) {
2176 		return error;
2177 	}
2178 	if (req->newptr == USER_ADDR_NULL) {
2179 		return 0;
2180 	}
2181 	error = proc_suser(current_proc());
2182 	if (error != 0) {
2183 		return error;
2184 	}
2185 	if (val < 0 || val > _MAX_DSCP) {
2186 		os_log(OS_LOG_DEFAULT, "%s: unexpected DSCP %d",
2187 		    __func__, val);
2188 		return EINVAL;
2189 	}
2190 
2191 	struct netsvctype_dscp_map netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {};
2192 
2193 	for (int i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
2194 		netsvctype_dscp_map[i].netsvctype = i;
2195 		netsvctype_dscp_map[i].dscp = custom_net_qos_dscp_map.netsvctype_to_dscp[i];
2196 	}
2197 	netsvctype_dscp_map[arg2].dscp = (uint8_t) val;
2198 
2199 	error = set_netsvctype_dscp_map(&custom_net_qos_dscp_map,
2200 	    netsvctype_dscp_map);
2201 
2202 	return 0;
2203 }
2204 
2205 int
2206 sysctl_net_qos_custom_reset SYSCTL_HANDLER_ARGS
2207 {
2208 #pragma unused(arg1, arg2)
2209 	int error = 0;
2210 	int val = 0;
2211 
2212 	error = sysctl_handle_int(oidp, &val, 0, req);
2213 	if (error || !req->newptr) {
2214 		return error;
2215 	}
2216 	if (req->newptr == USER_ADDR_NULL) {
2217 		return 0;
2218 	}
2219 	error = proc_suser(current_proc());
2220 	if (error != 0) {
2221 		return error;
2222 	}
2223 
2224 	error = set_netsvctype_dscp_map(&custom_net_qos_dscp_map,
2225 	    rfc4594_netsvctype_dscp_map);
2226 
2227 	return error;
2228 }
2229 
2230 uint8_t
custom_sc_to_dscp(uint32_t svc_class)2231 custom_sc_to_dscp(uint32_t svc_class)
2232 {
2233 	uint8_t dscp = _DSCP_DF;
2234 
2235 	switch (svc_class) {
2236 	case MBUF_SC_BK_SYS:
2237 		dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_BK_SYS];
2238 		break;
2239 	case MBUF_SC_BK:
2240 		dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_BK];
2241 		break;
2242 
2243 	case MBUF_SC_BE:
2244 		dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_BE];
2245 		break;
2246 	case MBUF_SC_RD:
2247 		dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_RD];
2248 		break;
2249 	case MBUF_SC_OAM:
2250 		dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_OAM];
2251 		break;
2252 
2253 	case MBUF_SC_AV:
2254 		dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_AV];
2255 		break;
2256 	case MBUF_SC_RV:
2257 		dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_RV];
2258 		break;
2259 	case MBUF_SC_VI:
2260 		dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_VI];
2261 		break;
2262 	case MBUF_SC_SIG:
2263 		dscp = custom_net_qos_dscp_map.netsvctype_to_dscp[NET_SERVICE_TYPE_SIG];
2264 		break;
2265 
2266 	case MBUF_SC_VO:
2267 		dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_VO];
2268 		break;
2269 	case MBUF_SC_CTL:
2270 		dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_CTL];
2271 		break;
2272 	default:
2273 		break;
2274 	}
2275 	return dscp;
2276 }
2277 #endif /* (DEBUG || DEVELOPMENT) */
2278