1 /*
2 * Copyright (c) 2009-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/types.h>
32 #include <sys/filedesc.h>
33 #include <sys/file_internal.h>
34 #include <sys/proc.h>
35 #include <sys/socket.h>
36 #include <sys/socketvar.h>
37 #include <sys/errno.h>
38 #include <sys/protosw.h>
39 #include <sys/domain.h>
40 #include <sys/mbuf.h>
41 #include <sys/queue.h>
42 #include <sys/sysctl.h>
43 #include <sys/sysproto.h>
44
45 #include <net/if.h>
46 #include <net/if_var.h>
47 #include <net/route.h>
48
49 #include <netinet/in.h>
50 #include <netinet/in_var.h>
51 #include <netinet/in_pcb.h>
52 #include <netinet/ip.h>
53 #include <netinet/ip_var.h>
54 #include <netinet/ip6.h>
55 #include <netinet6/ip6_var.h>
56 #include <netinet/udp.h>
57 #include <netinet/udp_var.h>
58 #include <netinet/tcp.h>
59 #include <netinet/tcp_var.h>
60 #include <netinet/tcp_cc.h>
61 #include <netinet/in_tclass.h>
62
63 #include <os/log.h>
64
65 static_assert(_SO_TC_MAX == SO_TC_STATS_MAX);
66
67 struct net_qos_dscp_map {
68 uint8_t sotc_to_dscp[SO_TC_MAX];
69 uint8_t netsvctype_to_dscp[_NET_SERVICE_TYPE_COUNT];
70 };
71
72 struct dcsp_msc_map {
73 uint8_t dscp;
74 mbuf_svc_class_t msc;
75 };
76 static inline int so_throttle_best_effort(struct socket *, struct ifnet *);
77 static void set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *, int);
78 static errno_t dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *, size_t,
79 struct dcsp_msc_map *);
80
81 static LCK_GRP_DECLARE(tclass_lck_grp, "tclass");
82 static LCK_MTX_DECLARE(tclass_lock, &tclass_lck_grp);
83
84 SYSCTL_NODE(_net, OID_AUTO, qos,
85 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "QoS");
86
87 static int sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS;
88 SYSCTL_PROC(_net_qos, OID_AUTO, default_netsvctype_to_dscp_map,
89 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
90 0, 0, sysctl_default_netsvctype_to_dscp_map, "S", "");
91
92 static int sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
93 SYSCTL_PROC(_net_qos, OID_AUTO, dscp_to_wifi_ac_map,
94 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
95 0, 0, sysctl_dscp_to_wifi_ac_map, "S", "");
96
97 static int sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
98 SYSCTL_PROC(_net_qos, OID_AUTO, reset_dscp_to_wifi_ac_map,
99 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
100 0, 0, sysctl_reset_dscp_to_wifi_ac_map, "I", "");
101
102 int net_qos_verbose = 0;
103 SYSCTL_INT(_net_qos, OID_AUTO, verbose,
104 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_verbose, 0, "");
105
106 /*
107 * Fastlane QoS policy:
108 * By Default allow all apps to get traffic class to DSCP mapping
109 */
110 SYSCTL_NODE(_net_qos, OID_AUTO, policy,
111 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
112
113 int net_qos_policy_restricted = 0;
114 SYSCTL_INT(_net_qos_policy, OID_AUTO, restricted,
115 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restricted, 0, "");
116
117 int net_qos_policy_restrict_avapps = 0;
118 SYSCTL_INT(_net_qos_policy, OID_AUTO, restrict_avapps,
119 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restrict_avapps, 0, "");
120
121 int net_qos_policy_wifi_enabled = 0;
122 SYSCTL_INT(_net_qos_policy, OID_AUTO, wifi_enabled,
123 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_wifi_enabled, 0, "");
124
125 int net_qos_policy_capable_enabled = 0;
126 SYSCTL_INT(_net_qos_policy, OID_AUTO, capable_enabled,
127 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_capable_enabled, 0, "");
128
129 /*
130 * Socket traffic class from network service type
131 */
132 const int sotc_by_netservicetype[_NET_SERVICE_TYPE_COUNT] = {
133 SO_TC_BE, /* NET_SERVICE_TYPE_BE */
134 SO_TC_BK, /* NET_SERVICE_TYPE_BK */
135 SO_TC_VI, /* NET_SERVICE_TYPE_SIG */
136 SO_TC_VI, /* NET_SERVICE_TYPE_VI */
137 SO_TC_VO, /* NET_SERVICE_TYPE_VO */
138 SO_TC_RV, /* NET_SERVICE_TYPE_RV */
139 SO_TC_AV, /* NET_SERVICE_TYPE_AV */
140 SO_TC_OAM, /* NET_SERVICE_TYPE_OAM */
141 SO_TC_RD /* NET_SERVICE_TYPE_RD */
142 };
143
144 /*
145 * DSCP mappings for QoS Fastlane as based on network service types
146 */
147 static const
148 struct netsvctype_dscp_map fastlane_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
149 { .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF },
150 { .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_AF11 },
151 { .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS3 },
152 { .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 },
153 { .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF },
154 { .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 },
155 { .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 },
156 { .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 },
157 { .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 },
158 };
159
160 /*
161 * DSCP mappings for QoS RFC4594 as based on network service types
162 */
163 static const
164 struct netsvctype_dscp_map rfc4594_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
165 { .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF },
166 { .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_CS1 },
167 { .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS5 },
168 { .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 },
169 { .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF },
170 { .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 },
171 { .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 },
172 { .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 },
173 { .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 },
174 };
175
176 static struct net_qos_dscp_map fastlane_net_qos_dscp_map;
177 static struct net_qos_dscp_map rfc4594_net_qos_dscp_map;
178 #if (DEBUG || DEVELOPMENT)
179 static struct net_qos_dscp_map custom_net_qos_dscp_map;
180 #endif /* (DEBUG || DEVELOPMENT) */
181
182 /*
183 * The size is one more than the max because DSCP start at zero
184 */
185 #define DSCP_ARRAY_SIZE (_MAX_DSCP + 1)
186
187 /*
188 * The DSCP to UP mapping (via mbuf service class) for WiFi follows is the mapping
189 * that implemented at the 802.11 driver level when the mbuf service class is
190 * MBUF_SC_BE.
191 *
192 * This clashes with the recommended mapping documented by the IETF document
193 * draft-szigeti-tsvwg-ieee-802-11e-01.txt but we keep the mapping to maintain
194 * binary compatibility. Applications should use the network service type socket
195 * option instead to select L2 QoS marking instead of IP_TOS or IPV6_TCLASS.
196 */
197 static const struct dcsp_msc_map default_dscp_to_wifi_ac_map[] = {
198 { .dscp = _DSCP_DF, .msc = MBUF_SC_BE }, /* RFC 2474 Standard */
199 { .dscp = 1, .msc = MBUF_SC_BE }, /* */
200 { .dscp = 2, .msc = MBUF_SC_BE }, /* */
201 { .dscp = 3, .msc = MBUF_SC_BE }, /* */
202 { .dscp = 4, .msc = MBUF_SC_BE }, /* */
203 { .dscp = 5, .msc = MBUF_SC_BE }, /* */
204 { .dscp = 6, .msc = MBUF_SC_BE }, /* */
205 { .dscp = 7, .msc = MBUF_SC_BE }, /* */
206
207 { .dscp = _DSCP_CS1, .msc = MBUF_SC_BK }, /* RFC 3662 Low-Priority Data */
208 { .dscp = 9, .msc = MBUF_SC_BK }, /* */
209 { .dscp = _DSCP_AF11, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
210 { .dscp = 11, .msc = MBUF_SC_BK }, /* */
211 { .dscp = _DSCP_AF12, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
212 { .dscp = 13, .msc = MBUF_SC_BK }, /* */
213 { .dscp = _DSCP_AF13, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
214 { .dscp = 15, .msc = MBUF_SC_BK }, /* */
215
216 { .dscp = _DSCP_CS2, .msc = MBUF_SC_BK }, /* RFC 4594 OAM */
217 { .dscp = 17, .msc = MBUF_SC_BK }, /* */
218 { .dscp = _DSCP_AF21, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
219 { .dscp = 19, .msc = MBUF_SC_BK }, /* */
220 { .dscp = _DSCP_AF22, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
221 { .dscp = 21, .msc = MBUF_SC_BK }, /* */
222 { .dscp = _DSCP_AF23, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
223 { .dscp = 23, .msc = MBUF_SC_BK }, /* */
224
225 { .dscp = _DSCP_CS3, .msc = MBUF_SC_BE }, /* RFC 2474 Broadcast Video */
226 { .dscp = 25, .msc = MBUF_SC_BE }, /* */
227 { .dscp = _DSCP_AF31, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
228 { .dscp = 27, .msc = MBUF_SC_BE }, /* */
229 { .dscp = _DSCP_AF32, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
230 { .dscp = 29, .msc = MBUF_SC_BE }, /* */
231 { .dscp = _DSCP_AF33, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
232 { .dscp = 31, .msc = MBUF_SC_BE }, /* */
233
234 { .dscp = _DSCP_CS4, .msc = MBUF_SC_VI }, /* RFC 2474 Real-Time Interactive */
235 { .dscp = 33, .msc = MBUF_SC_VI }, /* */
236 { .dscp = _DSCP_AF41, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
237 { .dscp = 35, .msc = MBUF_SC_VI }, /* */
238 { .dscp = _DSCP_AF42, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
239 { .dscp = 37, .msc = MBUF_SC_VI }, /* */
240 { .dscp = _DSCP_AF43, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
241 { .dscp = 39, .msc = MBUF_SC_VI }, /* */
242
243 { .dscp = _DSCP_CS5, .msc = MBUF_SC_VI }, /* RFC 2474 Signaling */
244 { .dscp = 41, .msc = MBUF_SC_VI }, /* */
245 { .dscp = 42, .msc = MBUF_SC_VI }, /* */
246 { .dscp = 43, .msc = MBUF_SC_VI }, /* */
247 { .dscp = _DSCP_VA, .msc = MBUF_SC_VI }, /* RFC 5865 VOICE-ADMIT */
248 { .dscp = 45, .msc = MBUF_SC_VI }, /* */
249 { .dscp = _DSCP_EF, .msc = MBUF_SC_VI }, /* RFC 3246 Telephony */
250 { .dscp = 47, .msc = MBUF_SC_VI }, /* */
251
252 { .dscp = _DSCP_CS6, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Chariot */
253 { .dscp = 49, .msc = MBUF_SC_VO }, /* */
254 { .dscp = 50, .msc = MBUF_SC_VO }, /* */
255 { .dscp = 51, .msc = MBUF_SC_VO }, /* */
256 { .dscp = 52, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Sigma */
257 { .dscp = 53, .msc = MBUF_SC_VO }, /* */
258 { .dscp = 54, .msc = MBUF_SC_VO }, /* */
259 { .dscp = 55, .msc = MBUF_SC_VO }, /* */
260
261 { .dscp = _DSCP_CS7, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Chariot */
262 { .dscp = 57, .msc = MBUF_SC_VO }, /* */
263 { .dscp = 58, .msc = MBUF_SC_VO }, /* */
264 { .dscp = 59, .msc = MBUF_SC_VO }, /* */
265 { .dscp = 60, .msc = MBUF_SC_VO }, /* */
266 { .dscp = 61, .msc = MBUF_SC_VO }, /* */
267 { .dscp = 62, .msc = MBUF_SC_VO }, /* */
268 { .dscp = 63, .msc = MBUF_SC_VO }, /* */
269
270 { .dscp = 255, .msc = MBUF_SC_UNSPEC } /* invalid DSCP to mark last entry */
271 };
272
273 mbuf_svc_class_t wifi_dscp_to_msc_array[DSCP_ARRAY_SIZE];
274
275 /*
276 * If there is no foreground activity on the interface for bg_switch_time
277 * seconds, the background connections can switch to foreground TCP
278 * congestion control.
279 */
280 #define TCP_BG_SWITCH_TIME 2 /* seconds */
281
282 #if (DEVELOPMENT || DEBUG)
283
284 static int tfp_count = 0;
285
286 static TAILQ_HEAD(, tclass_for_proc) tfp_head =
287 TAILQ_HEAD_INITIALIZER(tfp_head);
288
289 struct tclass_for_proc {
290 TAILQ_ENTRY(tclass_for_proc) tfp_link;
291 int tfp_class;
292 pid_t tfp_pid;
293 char tfp_pname[(2 * MAXCOMLEN) + 1];
294 uint32_t tfp_qos_mode;
295 };
296
297 static int get_pid_tclass(struct so_tcdbg *);
298 static int get_pname_tclass(struct so_tcdbg *);
299 static int set_pid_tclass(struct so_tcdbg *);
300 static int set_pname_tclass(struct so_tcdbg *);
301 static int flush_pid_tclass(struct so_tcdbg *);
302 static int purge_tclass_for_proc(void);
303 static int flush_tclass_for_proc(void);
304 static void set_tclass_for_curr_proc(struct socket *);
305
306 /*
307 * Must be called with tclass_lock held
308 */
309 static struct tclass_for_proc *
find_tfp_by_pid(pid_t pid)310 find_tfp_by_pid(pid_t pid)
311 {
312 struct tclass_for_proc *tfp;
313
314 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
315 if (tfp->tfp_pid == pid) {
316 break;
317 }
318 }
319 return tfp;
320 }
321
322 /*
323 * Must be called with tclass_lock held
324 */
325 static struct tclass_for_proc *
find_tfp_by_pname(const char * pname)326 find_tfp_by_pname(const char *pname)
327 {
328 struct tclass_for_proc *tfp;
329
330 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
331 if (strncmp(pname, tfp->tfp_pname,
332 sizeof(tfp->tfp_pname)) == 0) {
333 break;
334 }
335 }
336 return tfp;
337 }
338
339 __private_extern__ void
set_tclass_for_curr_proc(struct socket * so)340 set_tclass_for_curr_proc(struct socket *so)
341 {
342 struct tclass_for_proc *tfp = NULL;
343 proc_t p = current_proc(); /* Not ref counted */
344 pid_t pid = proc_pid(p);
345 char *pname = proc_best_name(p);
346
347 lck_mtx_lock(&tclass_lock);
348
349 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
350 if ((tfp->tfp_pid == pid) || (tfp->tfp_pid == -1 &&
351 strncmp(pname, tfp->tfp_pname,
352 sizeof(tfp->tfp_pname)) == 0)) {
353 if (tfp->tfp_class != SO_TC_UNSPEC) {
354 so->so_traffic_class = (uint16_t)tfp->tfp_class;
355 }
356
357 if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) {
358 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
359 } else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) {
360 so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
361 }
362 break;
363 }
364 }
365
366 lck_mtx_unlock(&tclass_lock);
367 }
368
369 /*
370 * Purge entries with PIDs of exited processes
371 */
372 int
purge_tclass_for_proc(void)373 purge_tclass_for_proc(void)
374 {
375 int error = 0;
376 struct tclass_for_proc *tfp, *tvar;
377
378 lck_mtx_lock(&tclass_lock);
379
380 TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
381 proc_t p;
382
383 if (tfp->tfp_pid == -1) {
384 continue;
385 }
386 if ((p = proc_find(tfp->tfp_pid)) == NULL) {
387 tfp_count--;
388 TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
389
390 kfree_type(struct tclass_for_proc, tfp);
391 } else {
392 proc_rele(p);
393 }
394 }
395
396 lck_mtx_unlock(&tclass_lock);
397
398 return error;
399 }
400
401 /*
402 * Remove one entry
403 * Must be called with tclass_lock held
404 */
405 static void
free_tclass_for_proc(struct tclass_for_proc * tfp)406 free_tclass_for_proc(struct tclass_for_proc *tfp)
407 {
408 if (tfp == NULL) {
409 return;
410 }
411 tfp_count--;
412 TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
413 kfree_type(struct tclass_for_proc, tfp);
414 }
415
416 /*
417 * Remove all entries
418 */
419 int
flush_tclass_for_proc(void)420 flush_tclass_for_proc(void)
421 {
422 int error = 0;
423 struct tclass_for_proc *tfp, *tvar;
424
425 lck_mtx_lock(&tclass_lock);
426
427 TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
428 free_tclass_for_proc(tfp);
429 }
430
431 lck_mtx_unlock(&tclass_lock);
432
433 return error;
434 }
435
436 /*
437 * Must be called with tclass_lock held
438 */
439 static struct tclass_for_proc *
alloc_tclass_for_proc(pid_t pid,const char * pname)440 alloc_tclass_for_proc(pid_t pid, const char *pname)
441 {
442 struct tclass_for_proc *tfp;
443
444 if (pid == -1 && pname == NULL) {
445 return NULL;
446 }
447
448 tfp = kalloc_type(struct tclass_for_proc, Z_NOWAIT | Z_ZERO);
449 if (tfp == NULL) {
450 return NULL;
451 }
452
453 tfp->tfp_pid = pid;
454 /*
455 * Add per pid entries before per proc name so we can find
456 * a specific instance of a process before the general name base entry.
457 */
458 if (pid != -1) {
459 TAILQ_INSERT_HEAD(&tfp_head, tfp, tfp_link);
460 } else {
461 strlcpy(tfp->tfp_pname, pname, sizeof(tfp->tfp_pname));
462 TAILQ_INSERT_TAIL(&tfp_head, tfp, tfp_link);
463 }
464
465 tfp_count++;
466
467 return tfp;
468 }
469
470 /*
471 * SO_TC_UNSPEC for tclass means to remove the entry
472 */
473 int
set_pid_tclass(struct so_tcdbg * so_tcdbg)474 set_pid_tclass(struct so_tcdbg *so_tcdbg)
475 {
476 int error = EINVAL;
477 proc_t p = NULL;
478 struct tclass_for_proc *tfp;
479 pid_t pid = so_tcdbg->so_tcdbg_pid;
480 int tclass = so_tcdbg->so_tcdbg_tclass;
481 int netsvctype = so_tcdbg->so_tcdbg_netsvctype;
482 uint8_t ecn_val = so_tcdbg->so_tcdbg_ecn_val;
483
484 p = proc_find(pid);
485 if (p == NULL) {
486 printf("%s proc_find(%d) failed\n", __func__, pid);
487 goto done;
488 }
489
490 /* Need a tfp */
491 lck_mtx_lock(&tclass_lock);
492
493 tfp = find_tfp_by_pid(pid);
494 if (tfp == NULL) {
495 tfp = alloc_tclass_for_proc(pid, NULL);
496 if (tfp == NULL) {
497 lck_mtx_unlock(&tclass_lock);
498 error = ENOBUFS;
499 goto done;
500 }
501 }
502 tfp->tfp_class = tclass;
503 tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
504
505 lck_mtx_unlock(&tclass_lock);
506
507 if (tfp != NULL) {
508 struct fileproc *fp;
509 proc_fdlock(p);
510 fdt_foreach(fp, p) {
511 struct socket *so;
512
513 if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
514 continue;
515 }
516
517 so = (struct socket *)fp_get_data(fp);
518 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
519 continue;
520 }
521
522 socket_lock(so, 1);
523 if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) {
524 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
525 } else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) {
526 so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
527 }
528
529 struct inpcb *inp = so ? sotoinpcb(so) : NULL;
530 struct tcpcb *tp = inp ? intotcpcb(inp) : NULL;
531
532 if (tp != NULL) {
533 if (ecn_val == IPTOS_ECN_ECT1 || ecn_val == IPTOS_ECN_ECT0) {
534 tp->ecn_flags |= (ecn_val == IPTOS_ECN_ECT1) ?
535 TE_FORCE_ECT1 : TE_FORCE_ECT0;
536 } else {
537 tp->ecn_flags &= ~(TE_FORCE_ECT1 | TE_FORCE_ECT0);
538 }
539 }
540 socket_unlock(so, 1);
541
542 if (netsvctype != _NET_SERVICE_TYPE_UNSPEC) {
543 error = sock_setsockopt(so, SOL_SOCKET,
544 SO_NET_SERVICE_TYPE, &netsvctype, sizeof(int));
545 }
546 if (tclass != SO_TC_UNSPEC) {
547 error = sock_setsockopt(so, SOL_SOCKET,
548 SO_TRAFFIC_CLASS, &tclass, sizeof(int));
549 }
550 }
551
552 proc_fdunlock(p);
553 }
554
555 error = 0;
556 done:
557 if (p != NULL) {
558 proc_rele(p);
559 }
560
561 return error;
562 }
563
564 int
set_pname_tclass(struct so_tcdbg * so_tcdbg)565 set_pname_tclass(struct so_tcdbg *so_tcdbg)
566 {
567 int error = EINVAL;
568 struct tclass_for_proc *tfp;
569
570 lck_mtx_lock(&tclass_lock);
571
572 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
573 if (tfp == NULL) {
574 tfp = alloc_tclass_for_proc(-1, so_tcdbg->so_tcdbg_pname);
575 if (tfp == NULL) {
576 lck_mtx_unlock(&tclass_lock);
577 error = ENOBUFS;
578 goto done;
579 }
580 }
581 tfp->tfp_class = so_tcdbg->so_tcdbg_tclass;
582 tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
583
584 lck_mtx_unlock(&tclass_lock);
585
586 error = 0;
587 done:
588
589 return error;
590 }
591
592 static int
flush_pid_tclass(struct so_tcdbg * so_tcdbg)593 flush_pid_tclass(struct so_tcdbg *so_tcdbg)
594 {
595 pid_t pid = so_tcdbg->so_tcdbg_pid;
596 int tclass = so_tcdbg->so_tcdbg_tclass;
597 struct fileproc *fp;
598 proc_t p;
599 int error;
600
601 p = proc_find(pid);
602 if (p == PROC_NULL) {
603 printf("%s proc_find(%d) failed\n", __func__, pid);
604 return EINVAL;
605 }
606
607 proc_fdlock(p);
608
609 fdt_foreach(fp, p) {
610 struct socket *so;
611
612 if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
613 continue;
614 }
615
616 so = (struct socket *)fp_get_data(fp);
617 error = sock_setsockopt(so, SOL_SOCKET, SO_FLUSH, &tclass,
618 sizeof(tclass));
619 if (error != 0) {
620 printf("%s: setsockopt(SO_FLUSH) (so=0x%llx, fd=%d, "
621 "tclass=%d) failed %d\n", __func__,
622 (uint64_t)VM_KERNEL_ADDRPERM(so), fdt_foreach_fd(), tclass,
623 error);
624 }
625 }
626
627 proc_fdunlock(p);
628
629 proc_rele(p);
630 return 0;
631 }
632
633 int
get_pid_tclass(struct so_tcdbg * so_tcdbg)634 get_pid_tclass(struct so_tcdbg *so_tcdbg)
635 {
636 int error = EINVAL;
637 proc_t p = NULL;
638 struct tclass_for_proc *tfp;
639 pid_t pid = so_tcdbg->so_tcdbg_pid;
640
641 so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
642
643 p = proc_find(pid);
644 if (p == NULL) {
645 printf("%s proc_find(%d) failed\n", __func__, pid);
646 goto done;
647 }
648
649 /* Need a tfp */
650 lck_mtx_lock(&tclass_lock);
651
652 tfp = find_tfp_by_pid(pid);
653 if (tfp != NULL) {
654 so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
655 so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
656 error = 0;
657 }
658 lck_mtx_unlock(&tclass_lock);
659 done:
660 if (p != NULL) {
661 proc_rele(p);
662 }
663
664 return error;
665 }
666
667 int
get_pname_tclass(struct so_tcdbg * so_tcdbg)668 get_pname_tclass(struct so_tcdbg *so_tcdbg)
669 {
670 int error = EINVAL;
671 struct tclass_for_proc *tfp;
672
673 so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
674
675 /* Need a tfp */
676 lck_mtx_lock(&tclass_lock);
677
678 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
679 if (tfp != NULL) {
680 so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
681 so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
682 error = 0;
683 }
684 lck_mtx_unlock(&tclass_lock);
685
686 return error;
687 }
688
689 static int
delete_tclass_for_pid_pname(struct so_tcdbg * so_tcdbg)690 delete_tclass_for_pid_pname(struct so_tcdbg *so_tcdbg)
691 {
692 int error = EINVAL;
693 pid_t pid = so_tcdbg->so_tcdbg_pid;
694 struct tclass_for_proc *tfp = NULL;
695
696 lck_mtx_lock(&tclass_lock);
697
698 if (pid != -1) {
699 tfp = find_tfp_by_pid(pid);
700 } else {
701 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
702 }
703
704 if (tfp != NULL) {
705 free_tclass_for_proc(tfp);
706 error = 0;
707 }
708
709 lck_mtx_unlock(&tclass_lock);
710
711 return error;
712 }
713
714 /*
715 * Setting options requires privileges
716 */
717 __private_extern__ int
so_set_tcdbg(struct socket * so,struct so_tcdbg * so_tcdbg)718 so_set_tcdbg(struct socket *so, struct so_tcdbg *so_tcdbg)
719 {
720 int error = 0;
721
722 if ((so->so_state & SS_PRIV) == 0) {
723 return EPERM;
724 }
725
726 socket_unlock(so, 0);
727
728 switch (so_tcdbg->so_tcdbg_cmd) {
729 case SO_TCDBG_PID:
730 error = set_pid_tclass(so_tcdbg);
731 break;
732
733 case SO_TCDBG_PNAME:
734 error = set_pname_tclass(so_tcdbg);
735 break;
736
737 case SO_TCDBG_PURGE:
738 error = purge_tclass_for_proc();
739 break;
740
741 case SO_TCDBG_FLUSH:
742 error = flush_tclass_for_proc();
743 break;
744
745 case SO_TCDBG_DELETE:
746 error = delete_tclass_for_pid_pname(so_tcdbg);
747 break;
748
749 case SO_TCDBG_TCFLUSH_PID:
750 error = flush_pid_tclass(so_tcdbg);
751 break;
752
753 default:
754 error = EINVAL;
755 break;
756 }
757
758 socket_lock(so, 0);
759
760 return error;
761 }
762
763 /*
764 * Not required to be privileged to get
765 */
766 __private_extern__ int
sogetopt_tcdbg(struct socket * so,struct sockopt * sopt)767 sogetopt_tcdbg(struct socket *so, struct sockopt *sopt)
768 {
769 int error = 0;
770 struct so_tcdbg so_tcdbg;
771 void *buf = NULL;
772 size_t len = sopt->sopt_valsize;
773
774 error = sooptcopyin(sopt, &so_tcdbg, sizeof(struct so_tcdbg),
775 sizeof(struct so_tcdbg));
776 if (error != 0) {
777 return error;
778 }
779
780 sopt->sopt_valsize = len;
781
782 socket_unlock(so, 0);
783
784 switch (so_tcdbg.so_tcdbg_cmd) {
785 case SO_TCDBG_PID:
786 error = get_pid_tclass(&so_tcdbg);
787 break;
788
789 case SO_TCDBG_PNAME:
790 error = get_pname_tclass(&so_tcdbg);
791 break;
792
793 case SO_TCDBG_COUNT:
794 lck_mtx_lock(&tclass_lock);
795 so_tcdbg.so_tcdbg_count = tfp_count;
796 lck_mtx_unlock(&tclass_lock);
797 break;
798
799 case SO_TCDBG_LIST: {
800 struct tclass_for_proc *tfp;
801 int n, alloc_count;
802 struct so_tcdbg *ptr;
803
804 lck_mtx_lock(&tclass_lock);
805 if ((alloc_count = tfp_count) == 0) {
806 lck_mtx_unlock(&tclass_lock);
807 error = EINVAL;
808 break;
809 }
810 len = alloc_count * sizeof(struct so_tcdbg);
811 lck_mtx_unlock(&tclass_lock);
812
813 buf = kalloc_data(len, Z_WAITOK | Z_ZERO);
814 if (buf == NULL) {
815 error = ENOBUFS;
816 break;
817 }
818
819 lck_mtx_lock(&tclass_lock);
820 n = 0;
821 ptr = (struct so_tcdbg *)buf;
822 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
823 if (++n > alloc_count) {
824 break;
825 }
826 if (tfp->tfp_pid != -1) {
827 ptr->so_tcdbg_cmd = SO_TCDBG_PID;
828 ptr->so_tcdbg_pid = tfp->tfp_pid;
829 } else {
830 ptr->so_tcdbg_cmd = SO_TCDBG_PNAME;
831 ptr->so_tcdbg_pid = -1;
832 strlcpy(ptr->so_tcdbg_pname,
833 tfp->tfp_pname,
834 sizeof(ptr->so_tcdbg_pname));
835 }
836 ptr->so_tcdbg_tclass = tfp->tfp_class;
837 ptr->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
838 ptr++;
839 }
840
841 lck_mtx_unlock(&tclass_lock);
842 }
843 break;
844
845 default:
846 error = EINVAL;
847 break;
848 }
849
850 socket_lock(so, 0);
851
852 if (error == 0) {
853 if (buf == NULL) {
854 error = sooptcopyout(sopt, &so_tcdbg,
855 sizeof(struct so_tcdbg));
856 } else {
857 error = sooptcopyout(sopt, buf, len);
858 kfree_data(buf, len);
859 }
860 }
861 return error;
862 }
863
864 #endif /* (DEVELOPMENT || DEBUG) */
865
866 int
so_get_netsvc_marking_level(struct socket * so)867 so_get_netsvc_marking_level(struct socket *so)
868 {
869 int marking_level = NETSVC_MRKNG_UNKNOWN;
870 struct ifnet *ifp = NULL;
871
872 switch (SOCK_DOM(so)) {
873 case PF_INET: {
874 struct inpcb *inp = sotoinpcb(so);
875
876 if (inp != NULL) {
877 ifp = inp->inp_last_outifp;
878 }
879 break;
880 }
881 case PF_INET6: {
882 struct in6pcb *in6p = sotoin6pcb(so);
883
884 if (in6p != NULL) {
885 ifp = in6p->in6p_last_outifp;
886 }
887 break;
888 }
889 default:
890 break;
891 }
892 if (ifp != NULL) {
893 if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0) {
894 if ((so->so_flags1 & SOF1_QOSMARKING_ALLOWED)) {
895 marking_level = NETSVC_MRKNG_LVL_L3L2_ALL;
896 } else {
897 marking_level = NETSVC_MRKNG_LVL_L3L2_BK;
898 }
899 } else {
900 marking_level = NETSVC_MRKNG_LVL_L2;
901 }
902 }
903 return marking_level;
904 }
905
906 __private_extern__ int
so_set_traffic_class(struct socket * so,int optval)907 so_set_traffic_class(struct socket *so, int optval)
908 {
909 int error = 0;
910
911 if (optval < SO_TC_BE || optval > SO_TC_CTL) {
912 error = EINVAL;
913 } else {
914 switch (optval) {
915 case _SO_TC_BK:
916 optval = SO_TC_BK;
917 break;
918 case _SO_TC_VI:
919 optval = SO_TC_VI;
920 break;
921 case _SO_TC_VO:
922 optval = SO_TC_VO;
923 break;
924 default:
925 if (!SO_VALID_TC(optval)) {
926 error = EINVAL;
927 }
928 break;
929 }
930
931 if (error == 0) {
932 int oldval = so->so_traffic_class;
933
934 VERIFY(SO_VALID_TC(optval));
935 so->so_traffic_class = (uint16_t)optval;
936
937 if ((SOCK_DOM(so) == PF_INET ||
938 SOCK_DOM(so) == PF_INET6) &&
939 SOCK_TYPE(so) == SOCK_STREAM) {
940 set_tcp_stream_priority(so);
941 }
942
943 if ((SOCK_DOM(so) == PF_INET ||
944 SOCK_DOM(so) == PF_INET6) &&
945 optval != oldval && (optval == SO_TC_BK_SYS ||
946 oldval == SO_TC_BK_SYS)) {
947 /*
948 * If the app switches from BK_SYS to something
949 * else, resume the socket if it was suspended.
950 */
951 if (oldval == SO_TC_BK_SYS) {
952 inp_reset_fc_state(so->so_pcb);
953 }
954
955 SOTHROTTLELOG("throttle[%d]: so 0x%llx "
956 "[%d,%d] opportunistic %s\n", so->last_pid,
957 (uint64_t)VM_KERNEL_ADDRPERM(so),
958 SOCK_DOM(so), SOCK_TYPE(so),
959 (optval == SO_TC_BK_SYS) ? "ON" : "OFF");
960 }
961 }
962 }
963 return error;
964 }
965
966 __private_extern__ int
so_set_net_service_type(struct socket * so,int netsvctype)967 so_set_net_service_type(struct socket *so, int netsvctype)
968 {
969 int sotc;
970 int error;
971
972 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype)) {
973 return EINVAL;
974 }
975
976 sotc = sotc_by_netservicetype[netsvctype];
977 error = so_set_traffic_class(so, sotc);
978 if (error != 0) {
979 return error;
980 }
981 so->so_netsvctype = (int8_t)netsvctype;
982 so->so_flags1 |= SOF1_TC_NET_SERV_TYPE;
983
984 return 0;
985 }
986
987 __private_extern__ void
so_set_default_traffic_class(struct socket * so)988 so_set_default_traffic_class(struct socket *so)
989 {
990 so->so_traffic_class = SO_TC_BE;
991
992 if ((SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6)) {
993 if (net_qos_policy_restricted == 0) {
994 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
995 }
996 #if (DEVELOPMENT || DEBUG)
997 if (tfp_count > 0) {
998 set_tclass_for_curr_proc(so);
999 }
1000 #endif /* (DEVELOPMENT || DEBUG) */
1001 }
1002 }
1003
1004 __private_extern__ int
so_set_opportunistic(struct socket * so,int optval)1005 so_set_opportunistic(struct socket *so, int optval)
1006 {
1007 return so_set_traffic_class(so, (optval == 0) ?
1008 SO_TC_BE : SO_TC_BK_SYS);
1009 }
1010
1011 __private_extern__ int
so_get_opportunistic(struct socket * so)1012 so_get_opportunistic(struct socket *so)
1013 {
1014 return so->so_traffic_class == SO_TC_BK_SYS;
1015 }
1016
1017 __private_extern__ int
so_tc_from_control(struct mbuf * control,int * out_netsvctype)1018 so_tc_from_control(struct mbuf *control, int *out_netsvctype)
1019 {
1020 struct cmsghdr *cm;
1021 int sotc = SO_TC_UNSPEC;
1022
1023 *out_netsvctype = _NET_SERVICE_TYPE_UNSPEC;
1024
1025 for (cm = M_FIRST_CMSGHDR(control);
1026 is_cmsg_valid(control, cm);
1027 cm = M_NXT_CMSGHDR(control, cm)) {
1028 int val;
1029
1030 if (cm->cmsg_level != SOL_SOCKET ||
1031 cm->cmsg_len != CMSG_LEN(sizeof(int))) {
1032 continue;
1033 }
1034 val = *(int *)(void *)CMSG_DATA(cm);
1035 /*
1036 * The first valid option wins
1037 */
1038 switch (cm->cmsg_type) {
1039 case SO_TRAFFIC_CLASS:
1040 if (SO_VALID_TC(val)) {
1041 sotc = val;
1042 return sotc;
1043 /* NOT REACHED */
1044 } else if (val < SO_TC_NET_SERVICE_OFFSET) {
1045 break;
1046 }
1047 /*
1048 * Handle the case SO_NET_SERVICE_TYPE values are
1049 * passed using SO_TRAFFIC_CLASS
1050 */
1051 val = val - SO_TC_NET_SERVICE_OFFSET;
1052 OS_FALLTHROUGH;
1053 case SO_NET_SERVICE_TYPE:
1054 if (!IS_VALID_NET_SERVICE_TYPE(val)) {
1055 break;
1056 }
1057 *out_netsvctype = val;
1058 sotc = sotc_by_netservicetype[val];
1059 return sotc;
1060 /* NOT REACHED */
1061 default:
1062 break;
1063 }
1064 }
1065
1066 return sotc;
1067 }
1068
1069 __private_extern__ int
so_tos_from_control(struct mbuf * control)1070 so_tos_from_control(struct mbuf *control)
1071 {
1072 struct cmsghdr *cm;
1073 int tos = IPTOS_UNSPEC;
1074
1075 for (cm = M_FIRST_CMSGHDR(control);
1076 is_cmsg_valid(control, cm);
1077 cm = M_NXT_CMSGHDR(control, cm)) {
1078 if (cm->cmsg_len != CMSG_LEN(sizeof(int))) {
1079 continue;
1080 }
1081
1082 if ((cm->cmsg_level == IPPROTO_IP &&
1083 cm->cmsg_type == IP_TOS) ||
1084 (cm->cmsg_level == IPPROTO_IPV6 &&
1085 cm->cmsg_type == IPV6_TCLASS)) {
1086 tos = *(int *)(void *)CMSG_DATA(cm) & IPTOS_MASK;
1087 /* The first valid option wins */
1088 break;
1089 }
1090 }
1091
1092 return tos;
1093 }
1094
1095 __private_extern__ void
so_recv_data_stat(struct socket * so,struct mbuf * m,size_t off)1096 so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off)
1097 {
1098 uint32_t mtc = m_get_traffic_class(m);
1099
1100 if (mtc >= SO_TC_STATS_MAX) {
1101 mtc = MBUF_TC_BE;
1102 }
1103
1104 so->so_tc_stats[mtc].rxpackets += 1;
1105 so->so_tc_stats[mtc].rxbytes +=
1106 ((m->m_flags & M_PKTHDR) ? m->m_pkthdr.len : 0) + off;
1107 }
1108
1109 __private_extern__ void
so_inc_recv_data_stat(struct socket * so,size_t pkts,size_t bytes,uint32_t mtc)1110 so_inc_recv_data_stat(struct socket *so, size_t pkts, size_t bytes,
1111 uint32_t mtc)
1112 {
1113 if (mtc >= SO_TC_STATS_MAX) {
1114 mtc = MBUF_TC_BE;
1115 }
1116
1117 so->so_tc_stats[mtc].rxpackets += pkts;
1118 so->so_tc_stats[mtc].rxbytes += bytes;
1119 }
1120
1121 static inline int
so_throttle_best_effort(struct socket * so,struct ifnet * ifp)1122 so_throttle_best_effort(struct socket *so, struct ifnet *ifp)
1123 {
1124 uint32_t uptime = (uint32_t)net_uptime();
1125 return soissrcbesteffort(so) &&
1126 net_io_policy_throttle_best_effort == 1 &&
1127 ifp->if_rt_sendts > 0 &&
1128 (int)(uptime - ifp->if_rt_sendts) <= TCP_BG_SWITCH_TIME;
1129 }
1130
1131 __private_extern__ void
set_tcp_stream_priority(struct socket * so)1132 set_tcp_stream_priority(struct socket *so)
1133 {
1134 struct inpcb *inp = sotoinpcb(so);
1135 struct tcpcb *tp = intotcpcb(inp);
1136 struct ifnet *outifp;
1137 u_char old_cc = tp->tcp_cc_index;
1138 int recvbg = IS_TCP_RECV_BG(so);
1139 bool is_local = false, fg_active = false;
1140 uint32_t uptime;
1141
1142 VERIFY((SOCK_CHECK_DOM(so, PF_INET) ||
1143 SOCK_CHECK_DOM(so, PF_INET6)) &&
1144 SOCK_CHECK_TYPE(so, SOCK_STREAM) &&
1145 SOCK_CHECK_PROTO(so, IPPROTO_TCP));
1146
1147 /* Return if the socket is in a terminal state */
1148 if (inp->inp_state == INPCB_STATE_DEAD) {
1149 return;
1150 }
1151
1152 outifp = inp->inp_last_outifp;
1153 uptime = (uint32_t)net_uptime();
1154
1155 /*
1156 * If the socket was marked as a background socket or if the
1157 * traffic class is set to background with traffic class socket
1158 * option then make both send and recv side of the stream to be
1159 * background. The variable sotcdb which can be set with sysctl
1160 * is used to disable these settings for testing.
1161 */
1162 if (outifp == NULL || (outifp->if_flags & IFF_LOOPBACK)) {
1163 is_local = true;
1164 }
1165
1166 /* Check if there has been recent foreground activity */
1167 if (outifp != NULL) {
1168 /*
1169 * If the traffic source is background, check if
1170 * there is recent foreground activity which should
1171 * continue to keep the traffic source as background.
1172 * Otherwise, we can switch the traffic source to
1173 * foreground.
1174 */
1175 if (soissrcbackground(so) && outifp->if_fg_sendts > 0 &&
1176 (int)(uptime - outifp->if_fg_sendts) <= TCP_BG_SWITCH_TIME) {
1177 fg_active = true;
1178 }
1179
1180 /*
1181 * The traffic source is best-effort -- check if
1182 * the policy to throttle best effort is enabled
1183 * and there was realtime activity on this
1184 * interface recently. If this is true, enable
1185 * algorithms that respond to increased latency
1186 * on best-effort traffic.
1187 */
1188 if (so_throttle_best_effort(so, outifp)) {
1189 fg_active = true;
1190 }
1191 }
1192
1193 /*
1194 * System initiated background traffic like cloud uploads should
1195 * always use background delay sensitive algorithms. This will
1196 * make the stream more responsive to other streams on the user's
1197 * network and it will minimize latency induced.
1198 */
1199 if (fg_active || IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
1200 /*
1201 * If the interface that the connection is using is
1202 * loopback, do not use background congestion
1203 * control algorithm.
1204 *
1205 * If there has been recent foreground activity or if there
1206 * was an indication that a real time foreground application
1207 * is going to use networking (net_io_policy_throttled),
1208 * switch the background and best effort streams to use background
1209 * congestion control algorithm.
1210 */
1211 if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0 || is_local) {
1212 if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) {
1213 tcp_set_foreground_cc(so);
1214 }
1215 } else {
1216 if (old_cc != TCP_CC_ALGO_BACKGROUND_INDEX) {
1217 tcp_set_background_cc(so);
1218 }
1219 }
1220
1221 /* Set receive side background flags */
1222 if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0 || is_local) {
1223 tcp_clear_recv_bg(so);
1224 } else {
1225 tcp_set_recv_bg(so);
1226 }
1227 } else {
1228 /*
1229 * If there is no recent foreground activity, even the
1230 * background flows can use foreground congestion controller.
1231 */
1232 tcp_clear_recv_bg(so);
1233 if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) {
1234 tcp_set_foreground_cc(so);
1235 }
1236 }
1237
1238 if (old_cc != tp->tcp_cc_index || recvbg != IS_TCP_RECV_BG(so)) {
1239 SOTHROTTLELOG("throttle[%d]: so 0x%llx [%d,%d] TCP %s send; "
1240 "%s recv\n", so->last_pid,
1241 (uint64_t)VM_KERNEL_ADDRPERM(so),
1242 SOCK_DOM(so), SOCK_TYPE(so),
1243 (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) ?
1244 "background" : "foreground",
1245 IS_TCP_RECV_BG(so) ? "background" : "foreground");
1246 }
1247 }
1248
1249 /*
1250 * Set traffic class to an IPv4 or IPv6 packet
1251 * - mark the mbuf
1252 * - set the DSCP code following the WMM mapping
1253 */
1254 __private_extern__ void
set_packet_service_class(struct mbuf * m,struct socket * so,int sotc,uint32_t flags)1255 set_packet_service_class(struct mbuf *m, struct socket *so,
1256 int sotc, uint32_t flags)
1257 {
1258 mbuf_svc_class_t msc = MBUF_SC_BE; /* Best effort by default */
1259 struct inpcb *inp = sotoinpcb(so); /* in6pcb and inpcb are the same */
1260
1261 if (!(m->m_flags & M_PKTHDR)) {
1262 return;
1263 }
1264
1265 /*
1266 * Here is the precedence:
1267 * 1) TRAFFIC_MGT_SO_BACKGROUND trumps all
1268 * 2) Traffic class passed via ancillary data to sendmsdg(2)
1269 * 3) Traffic class socket option last
1270 */
1271 if (sotc != SO_TC_UNSPEC) {
1272 VERIFY(SO_VALID_TC(sotc));
1273 msc = so_tc2msc(sotc);
1274 /* Assert because tc must have been valid */
1275 VERIFY(MBUF_VALID_SC(msc));
1276 }
1277
1278 /*
1279 * If TRAFFIC_MGT_SO_BACKGROUND is set or policy to throttle
1280 * best effort is set, depress the priority.
1281 */
1282 if (!IS_MBUF_SC_BACKGROUND(msc) && soisthrottled(so)) {
1283 msc = MBUF_SC_BK;
1284 }
1285
1286 if (IS_MBUF_SC_BESTEFFORT(msc) && inp->inp_last_outifp != NULL &&
1287 so_throttle_best_effort(so, inp->inp_last_outifp)) {
1288 msc = MBUF_SC_BK;
1289 }
1290
1291 if (soissrcbackground(so)) {
1292 m->m_pkthdr.pkt_flags |= PKTF_SO_BACKGROUND;
1293 }
1294
1295 if (soissrcrealtime(so) || IS_MBUF_SC_REALTIME(msc)) {
1296 m->m_pkthdr.pkt_flags |= PKTF_SO_REALTIME;
1297 }
1298 /*
1299 * Set the traffic class in the mbuf packet header svc field
1300 */
1301 if (sotcdb & SOTCDB_NO_MTC) {
1302 goto no_mbtc;
1303 }
1304
1305 /*
1306 * Elevate service class if the packet is a pure TCP ACK.
1307 * We can do this only when the flow is not a background
1308 * flow and the outgoing interface supports
1309 * transmit-start model.
1310 */
1311 if (!IS_MBUF_SC_BACKGROUND(msc) &&
1312 (flags & (PKT_SCF_TCP_ACK | PKT_SCF_TCP_SYN)) != 0) {
1313 msc = MBUF_SC_CTL;
1314 }
1315
1316 (void) m_set_service_class(m, msc);
1317
1318 /*
1319 * Set the privileged traffic auxiliary flag if applicable,
1320 * or clear it.
1321 */
1322 if (!(sotcdb & SOTCDB_NO_PRIVILEGED) && soisprivilegedtraffic(so) &&
1323 msc != MBUF_SC_UNSPEC) {
1324 m->m_pkthdr.pkt_flags |= PKTF_PRIO_PRIVILEGED;
1325 } else {
1326 m->m_pkthdr.pkt_flags &= ~PKTF_PRIO_PRIVILEGED;
1327 }
1328
1329 no_mbtc:
1330 /*
1331 * For TCP with background traffic class switch CC algo based on sysctl
1332 */
1333 if (so->so_type == SOCK_STREAM) {
1334 set_tcp_stream_priority(so);
1335 }
1336
1337 so_tc_update_stats(m, so, msc);
1338 }
1339
1340 __private_extern__ void
so_tc_update_stats(struct mbuf * m,struct socket * so,mbuf_svc_class_t msc)1341 so_tc_update_stats(struct mbuf *m, struct socket *so, mbuf_svc_class_t msc)
1342 {
1343 mbuf_traffic_class_t mtc;
1344
1345 /*
1346 * Assume socket and mbuf traffic class values are the same
1347 * Also assume the socket lock is held. Note that the stats
1348 * at the socket layer are reduced down to the legacy traffic
1349 * classes; we could/should potentially expand so_tc_stats[].
1350 */
1351 mtc = MBUF_SC2TC(msc);
1352 VERIFY(mtc < SO_TC_STATS_MAX);
1353 so->so_tc_stats[mtc].txpackets += 1;
1354 so->so_tc_stats[mtc].txbytes += m->m_pkthdr.len;
1355 }
1356
1357 __private_extern__ mbuf_svc_class_t
so_tc2msc(int tc)1358 so_tc2msc(int tc)
1359 {
1360 mbuf_svc_class_t msc;
1361
1362 switch (tc) {
1363 case SO_TC_BK_SYS:
1364 msc = MBUF_SC_BK_SYS;
1365 break;
1366 case SO_TC_BK:
1367 case _SO_TC_BK:
1368 msc = MBUF_SC_BK;
1369 break;
1370 case SO_TC_BE:
1371 msc = MBUF_SC_BE;
1372 break;
1373 case SO_TC_RD:
1374 msc = MBUF_SC_RD;
1375 break;
1376 case SO_TC_OAM:
1377 msc = MBUF_SC_OAM;
1378 break;
1379 case SO_TC_AV:
1380 msc = MBUF_SC_AV;
1381 break;
1382 case SO_TC_RV:
1383 msc = MBUF_SC_RV;
1384 break;
1385 case SO_TC_VI:
1386 case _SO_TC_VI:
1387 msc = MBUF_SC_VI;
1388 break;
1389 case SO_TC_NETSVC_SIG:
1390 msc = MBUF_SC_SIG;
1391 break;
1392 case SO_TC_VO:
1393 case _SO_TC_VO:
1394 msc = MBUF_SC_VO;
1395 break;
1396 case SO_TC_CTL:
1397 msc = MBUF_SC_CTL;
1398 break;
1399 case SO_TC_ALL:
1400 default:
1401 msc = MBUF_SC_UNSPEC;
1402 break;
1403 }
1404
1405 return msc;
1406 }
1407
1408 __private_extern__ int
so_svc2tc(mbuf_svc_class_t svc)1409 so_svc2tc(mbuf_svc_class_t svc)
1410 {
1411 switch (svc) {
1412 case MBUF_SC_BK_SYS:
1413 return SO_TC_BK_SYS;
1414 case MBUF_SC_BK:
1415 return SO_TC_BK;
1416 case MBUF_SC_BE:
1417 return SO_TC_BE;
1418 case MBUF_SC_RD:
1419 return SO_TC_RD;
1420 case MBUF_SC_OAM:
1421 return SO_TC_OAM;
1422 case MBUF_SC_AV:
1423 return SO_TC_AV;
1424 case MBUF_SC_RV:
1425 return SO_TC_RV;
1426 case MBUF_SC_VI:
1427 return SO_TC_VI;
1428 case MBUF_SC_SIG:
1429 return SO_TC_NETSVC_SIG;
1430 case MBUF_SC_VO:
1431 return SO_TC_VO;
1432 case MBUF_SC_CTL:
1433 return SO_TC_CTL;
1434 case MBUF_SC_UNSPEC:
1435 default:
1436 return SO_TC_BE;
1437 }
1438 }
1439
1440 static size_t
sotc_index(int sotc)1441 sotc_index(int sotc)
1442 {
1443 switch (sotc) {
1444 case SO_TC_BK_SYS:
1445 return SOTCIX_BK_SYS;
1446 case _SO_TC_BK:
1447 case SO_TC_BK:
1448 return SOTCIX_BK;
1449
1450 case SO_TC_BE:
1451 return SOTCIX_BE;
1452 case SO_TC_RD:
1453 return SOTCIX_RD;
1454 case SO_TC_OAM:
1455 return SOTCIX_OAM;
1456
1457 case SO_TC_AV:
1458 return SOTCIX_AV;
1459 case SO_TC_RV:
1460 return SOTCIX_RV;
1461 case _SO_TC_VI:
1462 case SO_TC_VI:
1463 return SOTCIX_VI;
1464
1465 case _SO_TC_VO:
1466 case SO_TC_VO:
1467 return SOTCIX_VO;
1468 case SO_TC_CTL:
1469 return SOTCIX_CTL;
1470
1471 default:
1472 break;
1473 }
1474 /*
1475 * Unknown traffic class value
1476 */
1477 return SIZE_T_MAX;
1478 }
1479
1480 uint8_t
fastlane_sc_to_dscp(uint32_t svc_class)1481 fastlane_sc_to_dscp(uint32_t svc_class)
1482 {
1483 uint8_t dscp = _DSCP_DF;
1484
1485 switch (svc_class) {
1486 case MBUF_SC_BK_SYS:
1487 case MBUF_SC_BK:
1488 dscp = _DSCP_AF11;
1489 break;
1490
1491 case MBUF_SC_BE:
1492 dscp = _DSCP_DF;
1493 break;
1494 case MBUF_SC_RD:
1495 dscp = _DSCP_AF21;
1496 break;
1497 case MBUF_SC_OAM:
1498 dscp = _DSCP_CS2;
1499 break;
1500
1501 case MBUF_SC_AV:
1502 dscp = _DSCP_AF31;
1503 break;
1504 case MBUF_SC_RV:
1505 dscp = _DSCP_CS4;
1506 break;
1507 case MBUF_SC_VI:
1508 dscp = _DSCP_AF41;
1509 break;
1510 case MBUF_SC_SIG:
1511 dscp = _DSCP_CS3;
1512 break;
1513
1514 case MBUF_SC_VO:
1515 dscp = _DSCP_EF;
1516 break;
1517 case MBUF_SC_CTL:
1518 dscp = _DSCP_DF;
1519 break;
1520 default:
1521 dscp = _DSCP_DF;
1522 break;
1523 }
1524
1525 return dscp;
1526 }
1527
1528 uint8_t
rfc4594_sc_to_dscp(uint32_t svc_class)1529 rfc4594_sc_to_dscp(uint32_t svc_class)
1530 {
1531 uint8_t dscp = _DSCP_DF;
1532
1533 switch (svc_class) {
1534 case MBUF_SC_BK_SYS: /* Low-Priority Data */
1535 case MBUF_SC_BK:
1536 dscp = _DSCP_CS1;
1537 break;
1538
1539 case MBUF_SC_BE: /* Standard */
1540 dscp = _DSCP_DF;
1541 break;
1542 case MBUF_SC_RD: /* Low-Latency Data */
1543 dscp = _DSCP_AF21;
1544 break;
1545
1546 /* SVC_CLASS Not Defined: High-Throughput Data */
1547
1548 case MBUF_SC_OAM: /* OAM */
1549 dscp = _DSCP_CS2;
1550 break;
1551
1552 /* SVC_CLASS Not Defined: Broadcast Video */
1553
1554 case MBUF_SC_AV: /* Multimedia Streaming */
1555 dscp = _DSCP_AF31;
1556 break;
1557 case MBUF_SC_RV: /* Real-Time Interactive */
1558 dscp = _DSCP_CS4;
1559 break;
1560 case MBUF_SC_VI: /* Multimedia Conferencing */
1561 dscp = _DSCP_AF41;
1562 break;
1563 case MBUF_SC_SIG: /* Signaling */
1564 dscp = _DSCP_CS5;
1565 break;
1566
1567 case MBUF_SC_VO: /* Telephony */
1568 dscp = _DSCP_EF;
1569 break;
1570 case MBUF_SC_CTL: /* Network Control*/
1571 dscp = _DSCP_CS6;
1572 break;
1573 default:
1574 dscp = _DSCP_DF;
1575 break;
1576 }
1577
1578 return dscp;
1579 }
1580
1581 mbuf_traffic_class_t
rfc4594_dscp_to_tc(uint8_t dscp)1582 rfc4594_dscp_to_tc(uint8_t dscp)
1583 {
1584 mbuf_traffic_class_t tc = MBUF_TC_BE;
1585
1586 switch (dscp) {
1587 case _DSCP_CS1:
1588 tc = MBUF_TC_BK;
1589 break;
1590 case _DSCP_DF:
1591 case _DSCP_AF21:
1592 case _DSCP_CS2:
1593 tc = MBUF_TC_BE;
1594 break;
1595 case _DSCP_AF31:
1596 case _DSCP_CS4:
1597 case _DSCP_AF41:
1598 case _DSCP_CS5:
1599 tc = MBUF_TC_VI;
1600 break;
1601 case _DSCP_EF:
1602 case _DSCP_CS6:
1603 tc = MBUF_TC_VO;
1604 break;
1605 default:
1606 tc = MBUF_TC_BE;
1607 break;
1608 }
1609
1610 return tc;
1611 }
1612
1613 /*
1614 * Pass NULL ifp for default map
1615 */
1616 static errno_t
set_netsvctype_dscp_map(struct net_qos_dscp_map * net_qos_dscp_map,const struct netsvctype_dscp_map * netsvctype_dscp_map)1617 set_netsvctype_dscp_map(struct net_qos_dscp_map *net_qos_dscp_map,
1618 const struct netsvctype_dscp_map *netsvctype_dscp_map)
1619 {
1620 size_t i;
1621 int netsvctype;
1622
1623 /*
1624 * Do not accept more that max number of distinct DSCPs
1625 */
1626 if (net_qos_dscp_map == NULL || netsvctype_dscp_map == NULL) {
1627 return EINVAL;
1628 }
1629
1630 /*
1631 * Validate input parameters
1632 */
1633 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1634 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype_dscp_map[i].netsvctype)) {
1635 return EINVAL;
1636 }
1637 if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
1638 return EINVAL;
1639 }
1640 }
1641
1642 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1643 netsvctype = netsvctype_dscp_map[i].netsvctype;
1644
1645 net_qos_dscp_map->netsvctype_to_dscp[netsvctype] =
1646 netsvctype_dscp_map[i].dscp;
1647 }
1648 for (netsvctype = 0; netsvctype < _NET_SERVICE_TYPE_COUNT; netsvctype++) {
1649 switch (netsvctype) {
1650 case NET_SERVICE_TYPE_BE:
1651 case NET_SERVICE_TYPE_BK:
1652 case NET_SERVICE_TYPE_VI:
1653 case NET_SERVICE_TYPE_VO:
1654 case NET_SERVICE_TYPE_RV:
1655 case NET_SERVICE_TYPE_AV:
1656 case NET_SERVICE_TYPE_OAM:
1657 case NET_SERVICE_TYPE_RD: {
1658 size_t sotcix;
1659
1660 sotcix = sotc_index(sotc_by_netservicetype[netsvctype]);
1661 if (sotcix != SIZE_T_MAX) {
1662 net_qos_dscp_map->sotc_to_dscp[sotcix] =
1663 netsvctype_dscp_map[netsvctype].dscp;
1664 }
1665 break;
1666 }
1667 case NET_SERVICE_TYPE_SIG:
1668 /* Signaling does not have its own traffic class */
1669 break;
1670 default:
1671 /* We should not be here */
1672 ASSERT(0);
1673 }
1674 }
1675 if (net_qos_dscp_map == &fastlane_net_qos_dscp_map) {
1676 /* Network control socket traffic class is always best effort for fastlane*/
1677 net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_DF;
1678 } else {
1679 net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_CS6;
1680 }
1681
1682 /* Background system socket traffic class DSCP same as background */
1683 net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK_SYS] =
1684 net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK];
1685
1686 return 0;
1687 }
1688
1689 static size_t
get_netsvctype_dscp_map(struct netsvctype_dscp_map * netsvctype_dscp_map)1690 get_netsvctype_dscp_map(struct netsvctype_dscp_map *netsvctype_dscp_map)
1691 {
1692 struct net_qos_dscp_map *net_qos_dscp_map;
1693 int i;
1694
1695 net_qos_dscp_map = &fastlane_net_qos_dscp_map;
1696
1697 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1698 netsvctype_dscp_map[i].netsvctype = i;
1699 netsvctype_dscp_map[i].dscp = net_qos_dscp_map->netsvctype_to_dscp[i];
1700 }
1701
1702 return i * sizeof(struct netsvctype_dscp_map);
1703 }
1704
1705 void
net_qos_map_init()1706 net_qos_map_init()
1707 {
1708 errno_t error;
1709
1710 error = set_netsvctype_dscp_map(&fastlane_net_qos_dscp_map,
1711 fastlane_netsvctype_dscp_map);
1712 ASSERT(error == 0);
1713
1714 error = set_netsvctype_dscp_map(&rfc4594_net_qos_dscp_map,
1715 rfc4594_netsvctype_dscp_map);
1716 ASSERT(error == 0);
1717
1718 #if (DEBUG || DEVELOPMENT)
1719 error = set_netsvctype_dscp_map(&custom_net_qos_dscp_map,
1720 rfc4594_netsvctype_dscp_map);
1721 ASSERT(error == 0);
1722
1723 #endif /* (DEBUG || DEVELOPMENT) */
1724
1725 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
1726 }
1727
1728 int
1729 sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS
1730 {
1731 #pragma unused(oidp, arg1, arg2)
1732 int error = 0;
1733
1734 if (req->oldptr == USER_ADDR_NULL) {
1735 req->oldidx =
1736 _NET_SERVICE_TYPE_COUNT * sizeof(struct netsvctype_dscp_map);
1737 } else if (req->oldlen > 0) {
1738 struct netsvctype_dscp_map netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {};
1739 size_t len;
1740
1741 len = get_netsvctype_dscp_map(netsvctype_dscp_map);
1742
1743 error = SYSCTL_OUT(req, netsvctype_dscp_map,
1744 MIN(len, req->oldlen));
1745 if (error != 0) {
1746 goto done;
1747 }
1748 }
1749
1750 if (req->newptr != USER_ADDR_NULL) {
1751 error = EPERM;
1752 }
1753 done:
1754 return error;
1755 }
1756
1757 __private_extern__ errno_t
set_packet_qos(struct mbuf * m,struct ifnet * ifp,boolean_t qos_allowed,int sotc,int netsvctype,uint8_t * dscp_inout)1758 set_packet_qos(struct mbuf *m, struct ifnet *ifp, boolean_t qos_allowed,
1759 int sotc, int netsvctype, uint8_t *dscp_inout)
1760 {
1761 if (ifp == NULL || dscp_inout == NULL) {
1762 return EINVAL;
1763 }
1764
1765 if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0 &&
1766 ifp->if_qosmarking_mode != IFRTYPE_QOSMARKING_MODE_NONE) {
1767 uint8_t dscp;
1768 const struct net_qos_dscp_map *net_qos_dscp_map = NULL;
1769
1770 switch (ifp->if_qosmarking_mode) {
1771 case IFRTYPE_QOSMARKING_FASTLANE:
1772 net_qos_dscp_map = &fastlane_net_qos_dscp_map;
1773 break;
1774 case IFRTYPE_QOSMARKING_RFC4594:
1775 net_qos_dscp_map = &rfc4594_net_qos_dscp_map;
1776 break;
1777 #if (DEBUG || DEVELOPMENT)
1778 case IFRTYPE_QOSMARKING_CUSTOM:
1779 net_qos_dscp_map = &custom_net_qos_dscp_map;
1780 break;
1781 #endif /* (DEBUG || DEVELOPMENT) */
1782 default:
1783 panic("invalid QoS marking type");
1784 /* NOTREACHED */
1785 }
1786
1787 /*
1788 * When on a Fastlane network, IP_TOS/IPV6_TCLASS are no-ops
1789 */
1790 dscp = _DSCP_DF;
1791
1792 /*
1793 * For DSCP use the network service type is specified, otherwise
1794 * use the socket traffic class
1795 *
1796 * When not whitelisted by the policy, set DSCP only for best
1797 * effort and background, and set the mbuf service class to
1798 * best effort as well so the packet will be queued and
1799 * scheduled at a lower priority.
1800 * We still want to prioritize control traffic on the interface
1801 * so we do not change the mbuf service class for SO_TC_CTL
1802 */
1803 if (IS_VALID_NET_SERVICE_TYPE(netsvctype) &&
1804 netsvctype != NET_SERVICE_TYPE_BE) {
1805 dscp = net_qos_dscp_map->netsvctype_to_dscp[netsvctype];
1806
1807 if (qos_allowed == FALSE &&
1808 netsvctype != NET_SERVICE_TYPE_BE &&
1809 netsvctype != NET_SERVICE_TYPE_BK) {
1810 dscp = _DSCP_DF;
1811 if (sotc != SO_TC_CTL) {
1812 m_set_service_class(m, MBUF_SC_BE);
1813 }
1814 }
1815 } else if (sotc != SO_TC_UNSPEC) {
1816 size_t sotcix = sotc_index(sotc);
1817 if (sotcix != SIZE_T_MAX) {
1818 dscp = net_qos_dscp_map->sotc_to_dscp[sotcix];
1819
1820 if (qos_allowed == FALSE && sotc != SO_TC_BE &&
1821 sotc != SO_TC_BK && sotc != SO_TC_BK_SYS &&
1822 sotc != SO_TC_CTL) {
1823 dscp = _DSCP_DF;
1824 if (sotc != SO_TC_CTL) {
1825 m_set_service_class(m, MBUF_SC_BE);
1826 }
1827 }
1828 }
1829 }
1830 if (net_qos_verbose != 0) {
1831 printf("%s qos_allowed %d sotc %u netsvctype %u dscp %u\n",
1832 __func__, qos_allowed, sotc, netsvctype, dscp);
1833 }
1834
1835 if (*dscp_inout != dscp) {
1836 *dscp_inout = dscp;
1837 }
1838 } else if (*dscp_inout != _DSCP_DF && IFNET_IS_WIFI_INFRA(ifp)) {
1839 mbuf_svc_class_t msc = m_get_service_class(m);
1840
1841 /*
1842 * For WiFi infra, when the mbuf service class is best effort
1843 * and the DSCP is not default, set the service class based
1844 * on DSCP
1845 */
1846 if (msc == MBUF_SC_BE) {
1847 msc = wifi_dscp_to_msc_array[*dscp_inout];
1848
1849 if (msc != MBUF_SC_BE) {
1850 m_set_service_class(m, msc);
1851
1852 if (net_qos_verbose != 0) {
1853 printf("%s set msc %u for dscp %u\n",
1854 __func__, msc, *dscp_inout);
1855 }
1856 }
1857 }
1858 }
1859
1860 return 0;
1861 }
1862
1863 static void
set_dscp_to_wifi_ac_map(const struct dcsp_msc_map * map,int clear)1864 set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *map, int clear)
1865 {
1866 int i;
1867
1868 if (clear) {
1869 bzero(wifi_dscp_to_msc_array, sizeof(wifi_dscp_to_msc_array));
1870 }
1871
1872 for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
1873 const struct dcsp_msc_map *elem = map + i;
1874
1875 if (elem->dscp > _MAX_DSCP || elem->msc == MBUF_SC_UNSPEC) {
1876 break;
1877 }
1878 switch (elem->msc) {
1879 case MBUF_SC_BK_SYS:
1880 case MBUF_SC_BK:
1881 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BK;
1882 break;
1883 default:
1884 case MBUF_SC_BE:
1885 case MBUF_SC_RD:
1886 case MBUF_SC_OAM:
1887 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BE;
1888 break;
1889 case MBUF_SC_AV:
1890 case MBUF_SC_RV:
1891 case MBUF_SC_VI:
1892 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VI;
1893 break;
1894 case MBUF_SC_VO:
1895 case MBUF_SC_CTL:
1896 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VO;
1897 break;
1898 }
1899 }
1900 }
1901
1902 static errno_t
dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map * netsvctype_dscp_map,size_t count,struct dcsp_msc_map * dcsp_msc_map)1903 dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *netsvctype_dscp_map,
1904 size_t count, struct dcsp_msc_map *dcsp_msc_map)
1905 {
1906 errno_t error = 0;
1907 uint32_t i;
1908
1909 /*
1910 * Validate input parameters
1911 */
1912 for (i = 0; i < count; i++) {
1913 if (!SO_VALID_TC(netsvctype_dscp_map[i].netsvctype)) {
1914 error = EINVAL;
1915 goto done;
1916 }
1917 if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
1918 error = EINVAL;
1919 goto done;
1920 }
1921 }
1922
1923 bzero(dcsp_msc_map, DSCP_ARRAY_SIZE * sizeof(struct dcsp_msc_map));
1924
1925 for (i = 0; i < count; i++) {
1926 dcsp_msc_map[i].dscp = netsvctype_dscp_map[i].dscp;
1927 dcsp_msc_map[i].msc = so_tc2msc(netsvctype_dscp_map[i].netsvctype);
1928 }
1929 done:
1930 return error;
1931 }
1932
1933 int
1934 sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
1935 {
1936 #pragma unused(oidp, arg1, arg2)
1937 int error = 0;
1938 size_t len = DSCP_ARRAY_SIZE * sizeof(struct netsvctype_dscp_map);
1939 struct netsvctype_dscp_map netsvctype_dscp_map[DSCP_ARRAY_SIZE] = {};
1940 struct dcsp_msc_map dcsp_msc_map[DSCP_ARRAY_SIZE];
1941 size_t count;
1942
1943 if (req->oldptr == USER_ADDR_NULL) {
1944 req->oldidx = len;
1945 } else if (req->oldlen > 0) {
1946 uint8_t i;
1947
1948 for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
1949 netsvctype_dscp_map[i].dscp = i;
1950 netsvctype_dscp_map[i].netsvctype =
1951 so_svc2tc(wifi_dscp_to_msc_array[i]);
1952 }
1953 error = SYSCTL_OUT(req, netsvctype_dscp_map,
1954 MIN(len, req->oldlen));
1955 if (error != 0) {
1956 goto done;
1957 }
1958 }
1959
1960 if (req->newptr == USER_ADDR_NULL) {
1961 goto done;
1962 }
1963
1964 error = proc_suser(current_proc());
1965 if (error != 0) {
1966 goto done;
1967 }
1968
1969 /*
1970 * Check input length
1971 */
1972 if (req->newlen > len) {
1973 error = EINVAL;
1974 goto done;
1975 }
1976 /*
1977 * Cap the number of entries to copy from input buffer
1978 */
1979 if (len > req->newlen) {
1980 len = req->newlen;
1981 }
1982 error = SYSCTL_IN(req, netsvctype_dscp_map, len);
1983 if (error != 0) {
1984 goto done;
1985 }
1986 count = len / sizeof(struct netsvctype_dscp_map);
1987 bzero(dcsp_msc_map, sizeof(dcsp_msc_map));
1988 error = dscp_msc_map_from_netsvctype_dscp_map(netsvctype_dscp_map, count,
1989 dcsp_msc_map);
1990 if (error != 0) {
1991 goto done;
1992 }
1993 set_dscp_to_wifi_ac_map(dcsp_msc_map, 0);
1994 done:
1995 return error;
1996 }
1997
1998 int
1999 sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
2000 {
2001 #pragma unused(oidp, arg1, arg2)
2002 int error = 0;
2003 int val = 0;
2004
2005 error = sysctl_handle_int(oidp, &val, 0, req);
2006 if (error || !req->newptr) {
2007 return error;
2008 }
2009 if (req->newptr == USER_ADDR_NULL) {
2010 return 0;
2011 }
2012 error = proc_suser(current_proc());
2013 if (error != 0) {
2014 return error;
2015 }
2016
2017 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
2018
2019 return 0;
2020 }
2021
2022 /*
2023 * Returns whether a large upload or download transfer should be marked as
2024 * BK service type for network activity. This is a system level
2025 * hint/suggestion to classify application traffic based on statistics
2026 * collected from the current network attachment
2027 *
2028 * Returns 1 for BK and 0 for default
2029 */
2030
2031 int
net_qos_guideline(struct proc * p,struct net_qos_guideline_args * arg,int * retval)2032 net_qos_guideline(struct proc *p, struct net_qos_guideline_args *arg,
2033 int *retval)
2034 {
2035 #pragma unused(p)
2036 #define RETURN_USE_BK 1
2037 #define RETURN_USE_DEFAULT 0
2038 struct net_qos_param qos_arg;
2039 struct ifnet *ipv4_primary, *ipv6_primary;
2040 int err = 0;
2041
2042 if (arg->param == USER_ADDR_NULL || retval == NULL ||
2043 arg->param_len != sizeof(qos_arg)) {
2044 return EINVAL;
2045 }
2046 err = copyin(arg->param, (caddr_t) &qos_arg, sizeof(qos_arg));
2047 if (err != 0) {
2048 return err;
2049 }
2050
2051 *retval = RETURN_USE_DEFAULT;
2052 ipv4_primary = ifindex2ifnet[get_primary_ifscope(AF_INET)];
2053 ipv6_primary = ifindex2ifnet[get_primary_ifscope(AF_INET6)];
2054
2055 /*
2056 * If either of the interfaces is in Low Internet mode, enable
2057 * background delay based algorithms on this transfer
2058 */
2059 if (qos_arg.nq_uplink) {
2060 if ((ipv4_primary != NULL &&
2061 (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_UL)) ||
2062 (ipv6_primary != NULL &&
2063 (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_UL))) {
2064 *retval = RETURN_USE_BK;
2065 return 0;
2066 }
2067 } else {
2068 if ((ipv4_primary != NULL &&
2069 (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_DL)) ||
2070 (ipv6_primary != NULL &&
2071 (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_DL))) {
2072 *retval = RETURN_USE_BK;
2073 return 0;
2074 }
2075 }
2076
2077 /*
2078 * Some times IPv4 and IPv6 primary interfaces can be different.
2079 * In this case, if either of them is non-cellular, we should mark
2080 * the transfer as BK as it can potentially get used based on
2081 * the host name resolution
2082 */
2083 if (ipv4_primary != NULL && IFNET_IS_EXPENSIVE(ipv4_primary) &&
2084 ipv6_primary != NULL && IFNET_IS_EXPENSIVE(ipv6_primary)) {
2085 if (qos_arg.nq_use_expensive) {
2086 return 0;
2087 } else {
2088 *retval = RETURN_USE_BK;
2089 return 0;
2090 }
2091 }
2092 if (ipv4_primary != NULL && IFNET_IS_CONSTRAINED(ipv4_primary) &&
2093 ipv6_primary != NULL && IFNET_IS_CONSTRAINED(ipv6_primary)) {
2094 if (qos_arg.nq_use_constrained) {
2095 return 0;
2096 } else {
2097 *retval = RETURN_USE_BK;
2098 return 0;
2099 }
2100 }
2101 if (qos_arg.nq_transfer_size >= 5 * 1024 * 1024) {
2102 *retval = RETURN_USE_BK;
2103 return 0;
2104 }
2105
2106
2107 #undef RETURN_USE_BK
2108 #undef RETURN_USE_DEFAULT
2109 return 0;
2110 }
2111
2112 #if (DEBUG || DEVELOPMENT)
2113 /*
2114 * Customizable QoS mapping table
2115 * By default it uses the mapping table for RFC 4594
2116 *
2117 * Notes:
2118 * BK_SYS is the same as BK
2119 * CTL cannot be changed and is always _DSCP_CS6
2120 */
2121 SYSCTL_NODE(_net_qos, OID_AUTO, custom,
2122 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
2123
2124 SYSCTL_NODE(_net_qos_custom, OID_AUTO, netsvctype_to_dscp,
2125 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
2126
2127 static int sysctl_net_qos_custom_netsvctype_to_dscp SYSCTL_HANDLER_ARGS;
2128 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, be,
2129 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2130 0, NET_SERVICE_TYPE_BE, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2131 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, bk,
2132 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2133 0, NET_SERVICE_TYPE_BK, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2134 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, sig,
2135 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2136 0, NET_SERVICE_TYPE_SIG, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2137 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, vi,
2138 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2139 0, NET_SERVICE_TYPE_VI, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2140 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, vo,
2141 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2142 0, NET_SERVICE_TYPE_VO, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2143 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, rv,
2144 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2145 0, NET_SERVICE_TYPE_RV, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2146 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, av,
2147 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2148 0, NET_SERVICE_TYPE_AV, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2149 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, oam,
2150 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2151 0, NET_SERVICE_TYPE_OAM, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2152 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, rd,
2153 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2154 0, NET_SERVICE_TYPE_RD, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2155
2156 static int sysctl_net_qos_custom_reset SYSCTL_HANDLER_ARGS;
2157 SYSCTL_PROC(_net_qos_custom, OID_AUTO, reset,
2158 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2159 0, 0, sysctl_net_qos_custom_reset, "I", "");
2160
2161 int
2162 sysctl_net_qos_custom_netsvctype_to_dscp SYSCTL_HANDLER_ARGS
2163 {
2164 #pragma unused(arg1)
2165 int error = 0;
2166
2167 switch (arg2) {
2168 case NET_SERVICE_TYPE_BE:
2169 case NET_SERVICE_TYPE_BK:
2170 case NET_SERVICE_TYPE_SIG:
2171 case NET_SERVICE_TYPE_VI:
2172 case NET_SERVICE_TYPE_VO:
2173 case NET_SERVICE_TYPE_RV:
2174 case NET_SERVICE_TYPE_AV:
2175 case NET_SERVICE_TYPE_OAM:
2176 case NET_SERVICE_TYPE_RD:
2177 break;
2178 default:
2179 os_log(OS_LOG_DEFAULT, "%s: unexpected netsvctype %d",
2180 __func__, arg2);
2181 return EINVAL;
2182 }
2183
2184 int val = custom_net_qos_dscp_map.netsvctype_to_dscp[arg2];
2185 error = sysctl_handle_int(oidp, &val, 0, req);
2186 if (error != 0 || req->newptr == USER_ADDR_NULL) {
2187 return error;
2188 }
2189 if (req->newptr == USER_ADDR_NULL) {
2190 return 0;
2191 }
2192 error = proc_suser(current_proc());
2193 if (error != 0) {
2194 return error;
2195 }
2196 if (val < 0 || val > _MAX_DSCP) {
2197 os_log(OS_LOG_DEFAULT, "%s: unexpected DSCP %d",
2198 __func__, val);
2199 return EINVAL;
2200 }
2201
2202 struct netsvctype_dscp_map netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {};
2203
2204 for (int i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
2205 netsvctype_dscp_map[i].netsvctype = i;
2206 netsvctype_dscp_map[i].dscp = custom_net_qos_dscp_map.netsvctype_to_dscp[i];
2207 }
2208 netsvctype_dscp_map[arg2].dscp = (uint8_t) val;
2209
2210 error = set_netsvctype_dscp_map(&custom_net_qos_dscp_map,
2211 netsvctype_dscp_map);
2212
2213 return 0;
2214 }
2215
2216 int
2217 sysctl_net_qos_custom_reset SYSCTL_HANDLER_ARGS
2218 {
2219 #pragma unused(arg1, arg2)
2220 int error = 0;
2221 int val = 0;
2222
2223 error = sysctl_handle_int(oidp, &val, 0, req);
2224 if (error || !req->newptr) {
2225 return error;
2226 }
2227 if (req->newptr == USER_ADDR_NULL) {
2228 return 0;
2229 }
2230 error = proc_suser(current_proc());
2231 if (error != 0) {
2232 return error;
2233 }
2234
2235 error = set_netsvctype_dscp_map(&custom_net_qos_dscp_map,
2236 rfc4594_netsvctype_dscp_map);
2237
2238 return error;
2239 }
2240
2241 uint8_t
custom_sc_to_dscp(uint32_t svc_class)2242 custom_sc_to_dscp(uint32_t svc_class)
2243 {
2244 uint8_t dscp = _DSCP_DF;
2245
2246 switch (svc_class) {
2247 case MBUF_SC_BK_SYS:
2248 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_BK_SYS];
2249 break;
2250 case MBUF_SC_BK:
2251 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_BK];
2252 break;
2253
2254 case MBUF_SC_BE:
2255 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_BE];
2256 break;
2257 case MBUF_SC_RD:
2258 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_RD];
2259 break;
2260 case MBUF_SC_OAM:
2261 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_OAM];
2262 break;
2263
2264 case MBUF_SC_AV:
2265 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_AV];
2266 break;
2267 case MBUF_SC_RV:
2268 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_RV];
2269 break;
2270 case MBUF_SC_VI:
2271 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_VI];
2272 break;
2273 case MBUF_SC_SIG:
2274 dscp = custom_net_qos_dscp_map.netsvctype_to_dscp[NET_SERVICE_TYPE_SIG];
2275 break;
2276
2277 case MBUF_SC_VO:
2278 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_VO];
2279 break;
2280 case MBUF_SC_CTL:
2281 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_CTL];
2282 break;
2283 default:
2284 break;
2285 }
2286 return dscp;
2287 }
2288 #endif /* (DEBUG || DEVELOPMENT) */
2289