1 /*
2 * Copyright (c) 2009-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/types.h>
32 #include <sys/filedesc.h>
33 #include <sys/file_internal.h>
34 #include <sys/proc.h>
35 #include <sys/socket.h>
36 #include <sys/socketvar.h>
37 #include <sys/errno.h>
38 #include <sys/protosw.h>
39 #include <sys/domain.h>
40 #include <sys/mbuf.h>
41 #include <sys/queue.h>
42 #include <sys/sysctl.h>
43 #include <sys/sysproto.h>
44
45 #include <net/if.h>
46 #include <net/if_var.h>
47 #include <net/route.h>
48
49 #include <netinet/in.h>
50 #include <netinet/in_var.h>
51 #include <netinet/in_pcb.h>
52 #include <netinet/ip.h>
53 #include <netinet/ip_var.h>
54 #include <netinet/ip6.h>
55 #include <netinet6/ip6_var.h>
56 #include <netinet/udp.h>
57 #include <netinet/udp_var.h>
58 #include <netinet/tcp.h>
59 #include <netinet/tcp_var.h>
60 #include <netinet/tcp_cc.h>
61 #include <netinet/in_tclass.h>
62
63 #include <os/log.h>
64
65 static_assert(_SO_TC_MAX == SO_TC_STATS_MAX);
66
67 struct net_qos_dscp_map {
68 uint8_t sotc_to_dscp[SO_TC_MAX];
69 uint8_t netsvctype_to_dscp[_NET_SERVICE_TYPE_COUNT];
70 };
71
72 struct dcsp_msc_map {
73 uint8_t dscp;
74 mbuf_svc_class_t msc;
75 };
76 static inline int so_throttle_best_effort(struct socket *, struct ifnet *);
77 static void set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *, int);
78 static errno_t dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *, size_t,
79 struct dcsp_msc_map *);
80
81 static LCK_GRP_DECLARE(tclass_lck_grp, "tclass");
82 static LCK_MTX_DECLARE(tclass_lock, &tclass_lck_grp);
83
84 SYSCTL_NODE(_net, OID_AUTO, qos,
85 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "QoS");
86
87 static int sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS;
88 SYSCTL_PROC(_net_qos, OID_AUTO, default_netsvctype_to_dscp_map,
89 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
90 0, 0, sysctl_default_netsvctype_to_dscp_map, "S", "");
91
92 static int sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
93 SYSCTL_PROC(_net_qos, OID_AUTO, dscp_to_wifi_ac_map,
94 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
95 0, 0, sysctl_dscp_to_wifi_ac_map, "S", "");
96
97 static int sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
98 SYSCTL_PROC(_net_qos, OID_AUTO, reset_dscp_to_wifi_ac_map,
99 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
100 0, 0, sysctl_reset_dscp_to_wifi_ac_map, "I", "");
101
102 int net_qos_verbose = 0;
103 SYSCTL_INT(_net_qos, OID_AUTO, verbose,
104 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_verbose, 0, "");
105
106 /*
107 * Fastlane QoS policy:
108 * By Default allow all apps to get traffic class to DSCP mapping
109 */
110 SYSCTL_NODE(_net_qos, OID_AUTO, policy,
111 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
112
113 int net_qos_policy_restricted = 0;
114 SYSCTL_INT(_net_qos_policy, OID_AUTO, restricted,
115 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restricted, 0, "");
116
117 int net_qos_policy_restrict_avapps = 0;
118 SYSCTL_INT(_net_qos_policy, OID_AUTO, restrict_avapps,
119 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restrict_avapps, 0, "");
120
121 int net_qos_policy_wifi_enabled = 0;
122 SYSCTL_INT(_net_qos_policy, OID_AUTO, wifi_enabled,
123 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_wifi_enabled, 0, "");
124
125 int net_qos_policy_capable_enabled = 0;
126 SYSCTL_INT(_net_qos_policy, OID_AUTO, capable_enabled,
127 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_capable_enabled, 0, "");
128
129 /*
130 * Socket traffic class from network service type
131 */
132 const int sotc_by_netservicetype[_NET_SERVICE_TYPE_COUNT] = {
133 SO_TC_BE, /* NET_SERVICE_TYPE_BE */
134 SO_TC_BK, /* NET_SERVICE_TYPE_BK */
135 SO_TC_VI, /* NET_SERVICE_TYPE_SIG */
136 SO_TC_VI, /* NET_SERVICE_TYPE_VI */
137 SO_TC_VO, /* NET_SERVICE_TYPE_VO */
138 SO_TC_RV, /* NET_SERVICE_TYPE_RV */
139 SO_TC_AV, /* NET_SERVICE_TYPE_AV */
140 SO_TC_OAM, /* NET_SERVICE_TYPE_OAM */
141 SO_TC_RD /* NET_SERVICE_TYPE_RD */
142 };
143
144 /*
145 * DSCP mappings for QoS Fastlane as based on network service types
146 */
147 static const
148 struct netsvctype_dscp_map fastlane_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
149 { .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF },
150 { .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_AF11 },
151 { .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS3 },
152 { .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 },
153 { .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF },
154 { .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 },
155 { .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 },
156 { .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 },
157 { .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 },
158 };
159
160 /*
161 * DSCP mappings for QoS RFC4594 as based on network service types
162 */
163 static const
164 struct netsvctype_dscp_map rfc4594_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
165 { .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF },
166 { .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_CS1 },
167 { .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS5 },
168 { .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 },
169 { .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF },
170 { .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 },
171 { .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 },
172 { .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 },
173 { .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 },
174 };
175
176 static struct net_qos_dscp_map fastlane_net_qos_dscp_map;
177 static struct net_qos_dscp_map rfc4594_net_qos_dscp_map;
178 #if (DEBUG || DEVELOPMENT)
179 static struct net_qos_dscp_map custom_net_qos_dscp_map;
180 #endif /* (DEBUG || DEVELOPMENT) */
181
182 /*
183 * The size is one more than the max because DSCP start at zero
184 */
185 #define DSCP_ARRAY_SIZE (_MAX_DSCP + 1)
186
187 /*
188 * The DSCP to UP mapping (via mbuf service class) for WiFi follows is the mapping
189 * that implemented at the 802.11 driver level when the mbuf service class is
190 * MBUF_SC_BE.
191 *
192 * This clashes with the recommended mapping documented by the IETF document
193 * draft-szigeti-tsvwg-ieee-802-11e-01.txt but we keep the mapping to maintain
194 * binary compatibility. Applications should use the network service type socket
195 * option instead to select L2 QoS marking instead of IP_TOS or IPV6_TCLASS.
196 */
197 static const struct dcsp_msc_map default_dscp_to_wifi_ac_map[] = {
198 { .dscp = _DSCP_DF, .msc = MBUF_SC_BE }, /* RFC 2474 Standard */
199 { .dscp = 1, .msc = MBUF_SC_BE }, /* */
200 { .dscp = 2, .msc = MBUF_SC_BE }, /* */
201 { .dscp = 3, .msc = MBUF_SC_BE }, /* */
202 { .dscp = 4, .msc = MBUF_SC_BE }, /* */
203 { .dscp = 5, .msc = MBUF_SC_BE }, /* */
204 { .dscp = 6, .msc = MBUF_SC_BE }, /* */
205 { .dscp = 7, .msc = MBUF_SC_BE }, /* */
206
207 { .dscp = _DSCP_CS1, .msc = MBUF_SC_BK }, /* RFC 3662 Low-Priority Data */
208 { .dscp = 9, .msc = MBUF_SC_BK }, /* */
209 { .dscp = _DSCP_AF11, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
210 { .dscp = 11, .msc = MBUF_SC_BK }, /* */
211 { .dscp = _DSCP_AF12, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
212 { .dscp = 13, .msc = MBUF_SC_BK }, /* */
213 { .dscp = _DSCP_AF13, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
214 { .dscp = 15, .msc = MBUF_SC_BK }, /* */
215
216 { .dscp = _DSCP_CS2, .msc = MBUF_SC_BK }, /* RFC 4594 OAM */
217 { .dscp = 17, .msc = MBUF_SC_BK }, /* */
218 { .dscp = _DSCP_AF21, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
219 { .dscp = 19, .msc = MBUF_SC_BK }, /* */
220 { .dscp = _DSCP_AF22, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
221 { .dscp = 21, .msc = MBUF_SC_BK }, /* */
222 { .dscp = _DSCP_AF23, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
223 { .dscp = 23, .msc = MBUF_SC_BK }, /* */
224
225 { .dscp = _DSCP_CS3, .msc = MBUF_SC_BE }, /* RFC 2474 Broadcast Video */
226 { .dscp = 25, .msc = MBUF_SC_BE }, /* */
227 { .dscp = _DSCP_AF31, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
228 { .dscp = 27, .msc = MBUF_SC_BE }, /* */
229 { .dscp = _DSCP_AF32, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
230 { .dscp = 29, .msc = MBUF_SC_BE }, /* */
231 { .dscp = _DSCP_AF33, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
232 { .dscp = 31, .msc = MBUF_SC_BE }, /* */
233
234 { .dscp = _DSCP_CS4, .msc = MBUF_SC_VI }, /* RFC 2474 Real-Time Interactive */
235 { .dscp = 33, .msc = MBUF_SC_VI }, /* */
236 { .dscp = _DSCP_AF41, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
237 { .dscp = 35, .msc = MBUF_SC_VI }, /* */
238 { .dscp = _DSCP_AF42, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
239 { .dscp = 37, .msc = MBUF_SC_VI }, /* */
240 { .dscp = _DSCP_AF43, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
241 { .dscp = 39, .msc = MBUF_SC_VI }, /* */
242
243 { .dscp = _DSCP_CS5, .msc = MBUF_SC_VI }, /* RFC 2474 Signaling */
244 { .dscp = 41, .msc = MBUF_SC_VI }, /* */
245 { .dscp = 42, .msc = MBUF_SC_VI }, /* */
246 { .dscp = 43, .msc = MBUF_SC_VI }, /* */
247 { .dscp = _DSCP_VA, .msc = MBUF_SC_VI }, /* RFC 5865 VOICE-ADMIT */
248 { .dscp = 45, .msc = MBUF_SC_VI }, /* */
249 { .dscp = _DSCP_EF, .msc = MBUF_SC_VI }, /* RFC 3246 Telephony */
250 { .dscp = 47, .msc = MBUF_SC_VI }, /* */
251
252 { .dscp = _DSCP_CS6, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Chariot */
253 { .dscp = 49, .msc = MBUF_SC_VO }, /* */
254 { .dscp = 50, .msc = MBUF_SC_VO }, /* */
255 { .dscp = 51, .msc = MBUF_SC_VO }, /* */
256 { .dscp = 52, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Sigma */
257 { .dscp = 53, .msc = MBUF_SC_VO }, /* */
258 { .dscp = 54, .msc = MBUF_SC_VO }, /* */
259 { .dscp = 55, .msc = MBUF_SC_VO }, /* */
260
261 { .dscp = _DSCP_CS7, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Chariot */
262 { .dscp = 57, .msc = MBUF_SC_VO }, /* */
263 { .dscp = 58, .msc = MBUF_SC_VO }, /* */
264 { .dscp = 59, .msc = MBUF_SC_VO }, /* */
265 { .dscp = 60, .msc = MBUF_SC_VO }, /* */
266 { .dscp = 61, .msc = MBUF_SC_VO }, /* */
267 { .dscp = 62, .msc = MBUF_SC_VO }, /* */
268 { .dscp = 63, .msc = MBUF_SC_VO }, /* */
269
270 { .dscp = 255, .msc = MBUF_SC_UNSPEC } /* invalid DSCP to mark last entry */
271 };
272
273 mbuf_svc_class_t wifi_dscp_to_msc_array[DSCP_ARRAY_SIZE];
274
275 /*
276 * If there is no foreground activity on the interface for bg_switch_time
277 * seconds, the background connections can switch to foreground TCP
278 * congestion control.
279 */
280 #define TCP_BG_SWITCH_TIME 2 /* seconds */
281
282 #if (DEVELOPMENT || DEBUG)
283
284 static int tfp_count = 0;
285
286 static TAILQ_HEAD(, tclass_for_proc) tfp_head =
287 TAILQ_HEAD_INITIALIZER(tfp_head);
288
289 struct tclass_for_proc {
290 TAILQ_ENTRY(tclass_for_proc) tfp_link;
291 int tfp_class;
292 pid_t tfp_pid;
293 char tfp_pname[(2 * MAXCOMLEN) + 1];
294 uint32_t tfp_qos_mode;
295 };
296
297 static int get_pid_tclass(struct so_tcdbg *);
298 static int get_pname_tclass(struct so_tcdbg *);
299 static int set_pid_tclass(struct so_tcdbg *);
300 static int set_pname_tclass(struct so_tcdbg *);
301 static int flush_pid_tclass(struct so_tcdbg *);
302 static int purge_tclass_for_proc(void);
303 static int flush_tclass_for_proc(void);
304 static void set_tclass_for_curr_proc(struct socket *);
305
306 /*
307 * Must be called with tclass_lock held
308 */
309 static struct tclass_for_proc *
find_tfp_by_pid(pid_t pid)310 find_tfp_by_pid(pid_t pid)
311 {
312 struct tclass_for_proc *tfp;
313
314 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
315 if (tfp->tfp_pid == pid) {
316 break;
317 }
318 }
319 return tfp;
320 }
321
322 /*
323 * Must be called with tclass_lock held
324 */
325 static struct tclass_for_proc *
find_tfp_by_pname(const char * pname)326 find_tfp_by_pname(const char *pname)
327 {
328 struct tclass_for_proc *tfp;
329
330 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
331 if (strncmp(pname, tfp->tfp_pname,
332 sizeof(tfp->tfp_pname)) == 0) {
333 break;
334 }
335 }
336 return tfp;
337 }
338
339 __private_extern__ void
set_tclass_for_curr_proc(struct socket * so)340 set_tclass_for_curr_proc(struct socket *so)
341 {
342 struct tclass_for_proc *tfp = NULL;
343 proc_t p = current_proc(); /* Not ref counted */
344 pid_t pid = proc_pid(p);
345 char *pname = proc_best_name(p);
346
347 lck_mtx_lock(&tclass_lock);
348
349 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
350 if ((tfp->tfp_pid == pid) || (tfp->tfp_pid == -1 &&
351 strncmp(pname, tfp->tfp_pname,
352 sizeof(tfp->tfp_pname)) == 0)) {
353 if (tfp->tfp_class != SO_TC_UNSPEC) {
354 so->so_traffic_class = (uint16_t)tfp->tfp_class;
355 }
356
357 if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) {
358 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
359 } else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) {
360 so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
361 }
362 break;
363 }
364 }
365
366 lck_mtx_unlock(&tclass_lock);
367 }
368
369 /*
370 * Purge entries with PIDs of exited processes
371 */
372 int
purge_tclass_for_proc(void)373 purge_tclass_for_proc(void)
374 {
375 int error = 0;
376 struct tclass_for_proc *tfp, *tvar;
377
378 lck_mtx_lock(&tclass_lock);
379
380 TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
381 proc_t p;
382
383 if (tfp->tfp_pid == -1) {
384 continue;
385 }
386 if ((p = proc_find(tfp->tfp_pid)) == NULL) {
387 tfp_count--;
388 TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
389
390 kfree_type(struct tclass_for_proc, tfp);
391 } else {
392 proc_rele(p);
393 }
394 }
395
396 lck_mtx_unlock(&tclass_lock);
397
398 return error;
399 }
400
401 /*
402 * Remove one entry
403 * Must be called with tclass_lock held
404 */
405 static void
free_tclass_for_proc(struct tclass_for_proc * tfp)406 free_tclass_for_proc(struct tclass_for_proc *tfp)
407 {
408 if (tfp == NULL) {
409 return;
410 }
411 tfp_count--;
412 TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
413 kfree_type(struct tclass_for_proc, tfp);
414 }
415
416 /*
417 * Remove all entries
418 */
419 int
flush_tclass_for_proc(void)420 flush_tclass_for_proc(void)
421 {
422 int error = 0;
423 struct tclass_for_proc *tfp, *tvar;
424
425 lck_mtx_lock(&tclass_lock);
426
427 TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
428 free_tclass_for_proc(tfp);
429 }
430
431 lck_mtx_unlock(&tclass_lock);
432
433 return error;
434 }
435
436 /*
437 * Must be called with tclass_lock held
438 */
439 static struct tclass_for_proc *
alloc_tclass_for_proc(pid_t pid,const char * pname)440 alloc_tclass_for_proc(pid_t pid, const char *pname)
441 {
442 struct tclass_for_proc *tfp;
443
444 if (pid == -1 && pname == NULL) {
445 return NULL;
446 }
447
448 tfp = kalloc_type(struct tclass_for_proc, Z_NOWAIT | Z_ZERO);
449 if (tfp == NULL) {
450 return NULL;
451 }
452
453 tfp->tfp_pid = pid;
454 /*
455 * Add per pid entries before per proc name so we can find
456 * a specific instance of a process before the general name base entry.
457 */
458 if (pid != -1) {
459 TAILQ_INSERT_HEAD(&tfp_head, tfp, tfp_link);
460 } else {
461 strlcpy(tfp->tfp_pname, pname, sizeof(tfp->tfp_pname));
462 TAILQ_INSERT_TAIL(&tfp_head, tfp, tfp_link);
463 }
464
465 tfp_count++;
466
467 return tfp;
468 }
469
470 /*
471 * SO_TC_UNSPEC for tclass means to remove the entry
472 */
473 int
set_pid_tclass(struct so_tcdbg * so_tcdbg)474 set_pid_tclass(struct so_tcdbg *so_tcdbg)
475 {
476 int error = EINVAL;
477 proc_t p = NULL;
478 struct tclass_for_proc *tfp;
479 pid_t pid = so_tcdbg->so_tcdbg_pid;
480 int tclass = so_tcdbg->so_tcdbg_tclass;
481 int netsvctype = so_tcdbg->so_tcdbg_netsvctype;
482
483 p = proc_find(pid);
484 if (p == NULL) {
485 printf("%s proc_find(%d) failed\n", __func__, pid);
486 goto done;
487 }
488
489 /* Need a tfp */
490 lck_mtx_lock(&tclass_lock);
491
492 tfp = find_tfp_by_pid(pid);
493 if (tfp == NULL) {
494 tfp = alloc_tclass_for_proc(pid, NULL);
495 if (tfp == NULL) {
496 lck_mtx_unlock(&tclass_lock);
497 error = ENOBUFS;
498 goto done;
499 }
500 }
501 tfp->tfp_class = tclass;
502 tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
503
504 lck_mtx_unlock(&tclass_lock);
505
506 if (tfp != NULL) {
507 struct fileproc *fp;
508
509 fdt_foreach(fp, p) {
510 struct socket *so;
511
512 if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
513 continue;
514 }
515
516 so = (struct socket *)fp_get_data(fp);
517 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
518 continue;
519 }
520
521 socket_lock(so, 1);
522 if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) {
523 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
524 } else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) {
525 so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
526 }
527 socket_unlock(so, 1);
528
529 if (netsvctype != _NET_SERVICE_TYPE_UNSPEC) {
530 error = sock_setsockopt(so, SOL_SOCKET,
531 SO_NET_SERVICE_TYPE, &netsvctype, sizeof(int));
532 }
533 if (tclass != SO_TC_UNSPEC) {
534 error = sock_setsockopt(so, SOL_SOCKET,
535 SO_TRAFFIC_CLASS, &tclass, sizeof(int));
536 }
537 }
538
539 proc_fdunlock(p);
540 }
541
542 error = 0;
543 done:
544 if (p != NULL) {
545 proc_rele(p);
546 }
547
548 return error;
549 }
550
551 int
set_pname_tclass(struct so_tcdbg * so_tcdbg)552 set_pname_tclass(struct so_tcdbg *so_tcdbg)
553 {
554 int error = EINVAL;
555 struct tclass_for_proc *tfp;
556
557 lck_mtx_lock(&tclass_lock);
558
559 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
560 if (tfp == NULL) {
561 tfp = alloc_tclass_for_proc(-1, so_tcdbg->so_tcdbg_pname);
562 if (tfp == NULL) {
563 lck_mtx_unlock(&tclass_lock);
564 error = ENOBUFS;
565 goto done;
566 }
567 }
568 tfp->tfp_class = so_tcdbg->so_tcdbg_tclass;
569 tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
570
571 lck_mtx_unlock(&tclass_lock);
572
573 error = 0;
574 done:
575
576 return error;
577 }
578
579 static int
flush_pid_tclass(struct so_tcdbg * so_tcdbg)580 flush_pid_tclass(struct so_tcdbg *so_tcdbg)
581 {
582 pid_t pid = so_tcdbg->so_tcdbg_pid;
583 int tclass = so_tcdbg->so_tcdbg_tclass;
584 struct fileproc *fp;
585 proc_t p;
586 int error;
587
588 p = proc_find(pid);
589 if (p == PROC_NULL) {
590 printf("%s proc_find(%d) failed\n", __func__, pid);
591 return EINVAL;
592 }
593
594 proc_fdlock(p);
595
596 fdt_foreach(fp, p) {
597 struct socket *so;
598
599 if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
600 continue;
601 }
602
603 so = (struct socket *)fp_get_data(fp);
604 error = sock_setsockopt(so, SOL_SOCKET, SO_FLUSH, &tclass,
605 sizeof(tclass));
606 if (error != 0) {
607 printf("%s: setsockopt(SO_FLUSH) (so=0x%llx, fd=%d, "
608 "tclass=%d) failed %d\n", __func__,
609 (uint64_t)VM_KERNEL_ADDRPERM(so), fdt_foreach_fd(), tclass,
610 error);
611 }
612 }
613
614 proc_fdunlock(p);
615
616 proc_rele(p);
617 return 0;
618 }
619
620 int
get_pid_tclass(struct so_tcdbg * so_tcdbg)621 get_pid_tclass(struct so_tcdbg *so_tcdbg)
622 {
623 int error = EINVAL;
624 proc_t p = NULL;
625 struct tclass_for_proc *tfp;
626 pid_t pid = so_tcdbg->so_tcdbg_pid;
627
628 so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
629
630 p = proc_find(pid);
631 if (p == NULL) {
632 printf("%s proc_find(%d) failed\n", __func__, pid);
633 goto done;
634 }
635
636 /* Need a tfp */
637 lck_mtx_lock(&tclass_lock);
638
639 tfp = find_tfp_by_pid(pid);
640 if (tfp != NULL) {
641 so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
642 so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
643 error = 0;
644 }
645 lck_mtx_unlock(&tclass_lock);
646 done:
647 if (p != NULL) {
648 proc_rele(p);
649 }
650
651 return error;
652 }
653
654 int
get_pname_tclass(struct so_tcdbg * so_tcdbg)655 get_pname_tclass(struct so_tcdbg *so_tcdbg)
656 {
657 int error = EINVAL;
658 struct tclass_for_proc *tfp;
659
660 so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
661
662 /* Need a tfp */
663 lck_mtx_lock(&tclass_lock);
664
665 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
666 if (tfp != NULL) {
667 so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
668 so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
669 error = 0;
670 }
671 lck_mtx_unlock(&tclass_lock);
672
673 return error;
674 }
675
676 static int
delete_tclass_for_pid_pname(struct so_tcdbg * so_tcdbg)677 delete_tclass_for_pid_pname(struct so_tcdbg *so_tcdbg)
678 {
679 int error = EINVAL;
680 pid_t pid = so_tcdbg->so_tcdbg_pid;
681 struct tclass_for_proc *tfp = NULL;
682
683 lck_mtx_lock(&tclass_lock);
684
685 if (pid != -1) {
686 tfp = find_tfp_by_pid(pid);
687 } else {
688 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
689 }
690
691 if (tfp != NULL) {
692 free_tclass_for_proc(tfp);
693 error = 0;
694 }
695
696 lck_mtx_unlock(&tclass_lock);
697
698 return error;
699 }
700
701 /*
702 * Setting options requires privileges
703 */
704 __private_extern__ int
so_set_tcdbg(struct socket * so,struct so_tcdbg * so_tcdbg)705 so_set_tcdbg(struct socket *so, struct so_tcdbg *so_tcdbg)
706 {
707 int error = 0;
708
709 if ((so->so_state & SS_PRIV) == 0) {
710 return EPERM;
711 }
712
713 socket_unlock(so, 0);
714
715 switch (so_tcdbg->so_tcdbg_cmd) {
716 case SO_TCDBG_PID:
717 error = set_pid_tclass(so_tcdbg);
718 break;
719
720 case SO_TCDBG_PNAME:
721 error = set_pname_tclass(so_tcdbg);
722 break;
723
724 case SO_TCDBG_PURGE:
725 error = purge_tclass_for_proc();
726 break;
727
728 case SO_TCDBG_FLUSH:
729 error = flush_tclass_for_proc();
730 break;
731
732 case SO_TCDBG_DELETE:
733 error = delete_tclass_for_pid_pname(so_tcdbg);
734 break;
735
736 case SO_TCDBG_TCFLUSH_PID:
737 error = flush_pid_tclass(so_tcdbg);
738 break;
739
740 default:
741 error = EINVAL;
742 break;
743 }
744
745 socket_lock(so, 0);
746
747 return error;
748 }
749
750 /*
751 * Not required to be privileged to get
752 */
753 __private_extern__ int
sogetopt_tcdbg(struct socket * so,struct sockopt * sopt)754 sogetopt_tcdbg(struct socket *so, struct sockopt *sopt)
755 {
756 int error = 0;
757 struct so_tcdbg so_tcdbg;
758 void *buf = NULL;
759 size_t len = sopt->sopt_valsize;
760
761 error = sooptcopyin(sopt, &so_tcdbg, sizeof(struct so_tcdbg),
762 sizeof(struct so_tcdbg));
763 if (error != 0) {
764 return error;
765 }
766
767 sopt->sopt_valsize = len;
768
769 socket_unlock(so, 0);
770
771 switch (so_tcdbg.so_tcdbg_cmd) {
772 case SO_TCDBG_PID:
773 error = get_pid_tclass(&so_tcdbg);
774 break;
775
776 case SO_TCDBG_PNAME:
777 error = get_pname_tclass(&so_tcdbg);
778 break;
779
780 case SO_TCDBG_COUNT:
781 lck_mtx_lock(&tclass_lock);
782 so_tcdbg.so_tcdbg_count = tfp_count;
783 lck_mtx_unlock(&tclass_lock);
784 break;
785
786 case SO_TCDBG_LIST: {
787 struct tclass_for_proc *tfp;
788 int n, alloc_count;
789 struct so_tcdbg *ptr;
790
791 lck_mtx_lock(&tclass_lock);
792 if ((alloc_count = tfp_count) == 0) {
793 lck_mtx_unlock(&tclass_lock);
794 error = EINVAL;
795 break;
796 }
797 len = alloc_count * sizeof(struct so_tcdbg);
798 lck_mtx_unlock(&tclass_lock);
799
800 buf = kalloc_data(len, Z_WAITOK | Z_ZERO);
801 if (buf == NULL) {
802 error = ENOBUFS;
803 break;
804 }
805
806 lck_mtx_lock(&tclass_lock);
807 n = 0;
808 ptr = (struct so_tcdbg *)buf;
809 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
810 if (++n > alloc_count) {
811 break;
812 }
813 if (tfp->tfp_pid != -1) {
814 ptr->so_tcdbg_cmd = SO_TCDBG_PID;
815 ptr->so_tcdbg_pid = tfp->tfp_pid;
816 } else {
817 ptr->so_tcdbg_cmd = SO_TCDBG_PNAME;
818 ptr->so_tcdbg_pid = -1;
819 strlcpy(ptr->so_tcdbg_pname,
820 tfp->tfp_pname,
821 sizeof(ptr->so_tcdbg_pname));
822 }
823 ptr->so_tcdbg_tclass = tfp->tfp_class;
824 ptr->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
825 ptr++;
826 }
827
828 lck_mtx_unlock(&tclass_lock);
829 }
830 break;
831
832 default:
833 error = EINVAL;
834 break;
835 }
836
837 socket_lock(so, 0);
838
839 if (error == 0) {
840 if (buf == NULL) {
841 error = sooptcopyout(sopt, &so_tcdbg,
842 sizeof(struct so_tcdbg));
843 } else {
844 error = sooptcopyout(sopt, buf, len);
845 kfree_data(buf, len);
846 }
847 }
848 return error;
849 }
850
851 #endif /* (DEVELOPMENT || DEBUG) */
852
853 int
so_get_netsvc_marking_level(struct socket * so)854 so_get_netsvc_marking_level(struct socket *so)
855 {
856 int marking_level = NETSVC_MRKNG_UNKNOWN;
857 struct ifnet *ifp = NULL;
858
859 switch (SOCK_DOM(so)) {
860 case PF_INET: {
861 struct inpcb *inp = sotoinpcb(so);
862
863 if (inp != NULL) {
864 ifp = inp->inp_last_outifp;
865 }
866 break;
867 }
868 case PF_INET6: {
869 struct in6pcb *in6p = sotoin6pcb(so);
870
871 if (in6p != NULL) {
872 ifp = in6p->in6p_last_outifp;
873 }
874 break;
875 }
876 default:
877 break;
878 }
879 if (ifp != NULL) {
880 if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0) {
881 if ((so->so_flags1 & SOF1_QOSMARKING_ALLOWED)) {
882 marking_level = NETSVC_MRKNG_LVL_L3L2_ALL;
883 } else {
884 marking_level = NETSVC_MRKNG_LVL_L3L2_BK;
885 }
886 } else {
887 marking_level = NETSVC_MRKNG_LVL_L2;
888 }
889 }
890 return marking_level;
891 }
892
893 __private_extern__ int
so_set_traffic_class(struct socket * so,int optval)894 so_set_traffic_class(struct socket *so, int optval)
895 {
896 int error = 0;
897
898 if (optval < SO_TC_BE || optval > SO_TC_CTL) {
899 error = EINVAL;
900 } else {
901 switch (optval) {
902 case _SO_TC_BK:
903 optval = SO_TC_BK;
904 break;
905 case _SO_TC_VI:
906 optval = SO_TC_VI;
907 break;
908 case _SO_TC_VO:
909 optval = SO_TC_VO;
910 break;
911 default:
912 if (!SO_VALID_TC(optval)) {
913 error = EINVAL;
914 }
915 break;
916 }
917
918 if (error == 0) {
919 int oldval = so->so_traffic_class;
920
921 VERIFY(SO_VALID_TC(optval));
922 so->so_traffic_class = (uint16_t)optval;
923
924 if ((SOCK_DOM(so) == PF_INET ||
925 SOCK_DOM(so) == PF_INET6) &&
926 SOCK_TYPE(so) == SOCK_STREAM) {
927 set_tcp_stream_priority(so);
928 }
929
930 if ((SOCK_DOM(so) == PF_INET ||
931 SOCK_DOM(so) == PF_INET6) &&
932 optval != oldval && (optval == SO_TC_BK_SYS ||
933 oldval == SO_TC_BK_SYS)) {
934 /*
935 * If the app switches from BK_SYS to something
936 * else, resume the socket if it was suspended.
937 */
938 if (oldval == SO_TC_BK_SYS) {
939 inp_reset_fc_state(so->so_pcb);
940 }
941
942 SOTHROTTLELOG("throttle[%d]: so 0x%llx "
943 "[%d,%d] opportunistic %s\n", so->last_pid,
944 (uint64_t)VM_KERNEL_ADDRPERM(so),
945 SOCK_DOM(so), SOCK_TYPE(so),
946 (optval == SO_TC_BK_SYS) ? "ON" : "OFF");
947 }
948 }
949 }
950 return error;
951 }
952
953 __private_extern__ int
so_set_net_service_type(struct socket * so,int netsvctype)954 so_set_net_service_type(struct socket *so, int netsvctype)
955 {
956 int sotc;
957 int error;
958
959 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype)) {
960 return EINVAL;
961 }
962
963 sotc = sotc_by_netservicetype[netsvctype];
964 error = so_set_traffic_class(so, sotc);
965 if (error != 0) {
966 return error;
967 }
968 so->so_netsvctype = (int8_t)netsvctype;
969 so->so_flags1 |= SOF1_TC_NET_SERV_TYPE;
970
971 return 0;
972 }
973
974 __private_extern__ void
so_set_default_traffic_class(struct socket * so)975 so_set_default_traffic_class(struct socket *so)
976 {
977 so->so_traffic_class = SO_TC_BE;
978
979 if ((SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6)) {
980 if (net_qos_policy_restricted == 0) {
981 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
982 }
983 #if (DEVELOPMENT || DEBUG)
984 if (tfp_count > 0) {
985 set_tclass_for_curr_proc(so);
986 }
987 #endif /* (DEVELOPMENT || DEBUG) */
988 }
989 }
990
991 __private_extern__ int
so_set_opportunistic(struct socket * so,int optval)992 so_set_opportunistic(struct socket *so, int optval)
993 {
994 return so_set_traffic_class(so, (optval == 0) ?
995 SO_TC_BE : SO_TC_BK_SYS);
996 }
997
998 __private_extern__ int
so_get_opportunistic(struct socket * so)999 so_get_opportunistic(struct socket *so)
1000 {
1001 return so->so_traffic_class == SO_TC_BK_SYS;
1002 }
1003
1004 __private_extern__ int
so_tc_from_control(struct mbuf * control,int * out_netsvctype)1005 so_tc_from_control(struct mbuf *control, int *out_netsvctype)
1006 {
1007 struct cmsghdr *cm;
1008 int sotc = SO_TC_UNSPEC;
1009
1010 *out_netsvctype = _NET_SERVICE_TYPE_UNSPEC;
1011
1012 for (cm = M_FIRST_CMSGHDR(control);
1013 is_cmsg_valid(control, cm);
1014 cm = M_NXT_CMSGHDR(control, cm)) {
1015 int val;
1016
1017 if (cm->cmsg_level != SOL_SOCKET ||
1018 cm->cmsg_len != CMSG_LEN(sizeof(int))) {
1019 continue;
1020 }
1021 val = *(int *)(void *)CMSG_DATA(cm);
1022 /*
1023 * The first valid option wins
1024 */
1025 switch (cm->cmsg_type) {
1026 case SO_TRAFFIC_CLASS:
1027 if (SO_VALID_TC(val)) {
1028 sotc = val;
1029 return sotc;
1030 /* NOT REACHED */
1031 } else if (val < SO_TC_NET_SERVICE_OFFSET) {
1032 break;
1033 }
1034 /*
1035 * Handle the case SO_NET_SERVICE_TYPE values are
1036 * passed using SO_TRAFFIC_CLASS
1037 */
1038 val = val - SO_TC_NET_SERVICE_OFFSET;
1039 OS_FALLTHROUGH;
1040 case SO_NET_SERVICE_TYPE:
1041 if (!IS_VALID_NET_SERVICE_TYPE(val)) {
1042 break;
1043 }
1044 *out_netsvctype = val;
1045 sotc = sotc_by_netservicetype[val];
1046 return sotc;
1047 /* NOT REACHED */
1048 default:
1049 break;
1050 }
1051 }
1052
1053 return sotc;
1054 }
1055
1056 __private_extern__ int
so_tos_from_control(struct mbuf * control)1057 so_tos_from_control(struct mbuf *control)
1058 {
1059 struct cmsghdr *cm;
1060 int tos = IPTOS_UNSPEC;
1061
1062 for (cm = M_FIRST_CMSGHDR(control);
1063 is_cmsg_valid(control, cm);
1064 cm = M_NXT_CMSGHDR(control, cm)) {
1065 if (cm->cmsg_len != CMSG_LEN(sizeof(int))) {
1066 continue;
1067 }
1068
1069 if ((cm->cmsg_level == IPPROTO_IP &&
1070 cm->cmsg_type == IP_TOS) ||
1071 (cm->cmsg_level == IPPROTO_IPV6 &&
1072 cm->cmsg_type == IPV6_TCLASS)) {
1073 tos = *(int *)(void *)CMSG_DATA(cm) & IPTOS_MASK;
1074 /* The first valid option wins */
1075 break;
1076 }
1077 }
1078
1079 return tos;
1080 }
1081
1082 __private_extern__ void
so_recv_data_stat(struct socket * so,struct mbuf * m,size_t off)1083 so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off)
1084 {
1085 uint32_t mtc = m_get_traffic_class(m);
1086
1087 if (mtc >= SO_TC_STATS_MAX) {
1088 mtc = MBUF_TC_BE;
1089 }
1090
1091 so->so_tc_stats[mtc].rxpackets += 1;
1092 so->so_tc_stats[mtc].rxbytes +=
1093 ((m->m_flags & M_PKTHDR) ? m->m_pkthdr.len : 0) + off;
1094 }
1095
1096 __private_extern__ void
so_inc_recv_data_stat(struct socket * so,size_t pkts,size_t bytes,uint32_t mtc)1097 so_inc_recv_data_stat(struct socket *so, size_t pkts, size_t bytes,
1098 uint32_t mtc)
1099 {
1100 if (mtc >= SO_TC_STATS_MAX) {
1101 mtc = MBUF_TC_BE;
1102 }
1103
1104 so->so_tc_stats[mtc].rxpackets += pkts;
1105 so->so_tc_stats[mtc].rxbytes += bytes;
1106 }
1107
1108 static inline int
so_throttle_best_effort(struct socket * so,struct ifnet * ifp)1109 so_throttle_best_effort(struct socket *so, struct ifnet *ifp)
1110 {
1111 uint32_t uptime = (uint32_t)net_uptime();
1112 return soissrcbesteffort(so) &&
1113 net_io_policy_throttle_best_effort == 1 &&
1114 ifp->if_rt_sendts > 0 &&
1115 (int)(uptime - ifp->if_rt_sendts) <= TCP_BG_SWITCH_TIME;
1116 }
1117
1118 __private_extern__ void
set_tcp_stream_priority(struct socket * so)1119 set_tcp_stream_priority(struct socket *so)
1120 {
1121 struct inpcb *inp = sotoinpcb(so);
1122 struct tcpcb *tp = intotcpcb(inp);
1123 struct ifnet *outifp;
1124 u_char old_cc = tp->tcp_cc_index;
1125 int recvbg = IS_TCP_RECV_BG(so);
1126 bool is_local = false, fg_active = false;
1127 uint32_t uptime;
1128
1129 VERIFY((SOCK_CHECK_DOM(so, PF_INET) ||
1130 SOCK_CHECK_DOM(so, PF_INET6)) &&
1131 SOCK_CHECK_TYPE(so, SOCK_STREAM) &&
1132 SOCK_CHECK_PROTO(so, IPPROTO_TCP));
1133
1134 /* Return if the socket is in a terminal state */
1135 if (inp->inp_state == INPCB_STATE_DEAD) {
1136 return;
1137 }
1138
1139 outifp = inp->inp_last_outifp;
1140 uptime = (uint32_t)net_uptime();
1141
1142 /*
1143 * If the socket was marked as a background socket or if the
1144 * traffic class is set to background with traffic class socket
1145 * option then make both send and recv side of the stream to be
1146 * background. The variable sotcdb which can be set with sysctl
1147 * is used to disable these settings for testing.
1148 */
1149 if (outifp == NULL || (outifp->if_flags & IFF_LOOPBACK)) {
1150 is_local = true;
1151 }
1152
1153 /* Check if there has been recent foreground activity */
1154 if (outifp != NULL) {
1155 /*
1156 * If the traffic source is background, check if
1157 * there is recent foreground activity which should
1158 * continue to keep the traffic source as background.
1159 * Otherwise, we can switch the traffic source to
1160 * foreground.
1161 */
1162 if (soissrcbackground(so) && outifp->if_fg_sendts > 0 &&
1163 (int)(uptime - outifp->if_fg_sendts) <= TCP_BG_SWITCH_TIME) {
1164 fg_active = true;
1165 }
1166
1167 /*
1168 * The traffic source is best-effort -- check if
1169 * the policy to throttle best effort is enabled
1170 * and there was realtime activity on this
1171 * interface recently. If this is true, enable
1172 * algorithms that respond to increased latency
1173 * on best-effort traffic.
1174 */
1175 if (so_throttle_best_effort(so, outifp)) {
1176 fg_active = true;
1177 }
1178 }
1179
1180 /*
1181 * System initiated background traffic like cloud uploads should
1182 * always use background delay sensitive algorithms. This will
1183 * make the stream more responsive to other streams on the user's
1184 * network and it will minimize latency induced.
1185 */
1186 if (fg_active || IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
1187 /*
1188 * If the interface that the connection is using is
1189 * loopback, do not use background congestion
1190 * control algorithm.
1191 *
1192 * If there has been recent foreground activity or if there
1193 * was an indication that a real time foreground application
1194 * is going to use networking (net_io_policy_throttled),
1195 * switch the background and best effort streams to use background
1196 * congestion control algorithm.
1197 */
1198 if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0 || is_local) {
1199 if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) {
1200 tcp_set_foreground_cc(so);
1201 }
1202 } else {
1203 if (old_cc != TCP_CC_ALGO_BACKGROUND_INDEX) {
1204 tcp_set_background_cc(so);
1205 }
1206 }
1207
1208 /* Set receive side background flags */
1209 if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0 || is_local) {
1210 tcp_clear_recv_bg(so);
1211 } else {
1212 tcp_set_recv_bg(so);
1213 }
1214 } else {
1215 /*
1216 * If there is no recent foreground activity, even the
1217 * background flows can use foreground congestion controller.
1218 */
1219 tcp_clear_recv_bg(so);
1220 if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) {
1221 tcp_set_foreground_cc(so);
1222 }
1223 }
1224
1225 if (old_cc != tp->tcp_cc_index || recvbg != IS_TCP_RECV_BG(so)) {
1226 SOTHROTTLELOG("throttle[%d]: so 0x%llx [%d,%d] TCP %s send; "
1227 "%s recv\n", so->last_pid,
1228 (uint64_t)VM_KERNEL_ADDRPERM(so),
1229 SOCK_DOM(so), SOCK_TYPE(so),
1230 (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) ?
1231 "background" : "foreground",
1232 IS_TCP_RECV_BG(so) ? "background" : "foreground");
1233 }
1234 }
1235
1236 /*
1237 * Set traffic class to an IPv4 or IPv6 packet
1238 * - mark the mbuf
1239 * - set the DSCP code following the WMM mapping
1240 */
1241 __private_extern__ void
set_packet_service_class(struct mbuf * m,struct socket * so,int sotc,uint32_t flags)1242 set_packet_service_class(struct mbuf *m, struct socket *so,
1243 int sotc, uint32_t flags)
1244 {
1245 mbuf_svc_class_t msc = MBUF_SC_BE; /* Best effort by default */
1246 struct inpcb *inp = sotoinpcb(so); /* in6pcb and inpcb are the same */
1247
1248 if (!(m->m_flags & M_PKTHDR)) {
1249 return;
1250 }
1251
1252 /*
1253 * Here is the precedence:
1254 * 1) TRAFFIC_MGT_SO_BACKGROUND trumps all
1255 * 2) Traffic class passed via ancillary data to sendmsdg(2)
1256 * 3) Traffic class socket option last
1257 */
1258 if (sotc != SO_TC_UNSPEC) {
1259 VERIFY(SO_VALID_TC(sotc));
1260 msc = so_tc2msc(sotc);
1261 /* Assert because tc must have been valid */
1262 VERIFY(MBUF_VALID_SC(msc));
1263 }
1264
1265 /*
1266 * If TRAFFIC_MGT_SO_BACKGROUND is set or policy to throttle
1267 * best effort is set, depress the priority.
1268 */
1269 if (!IS_MBUF_SC_BACKGROUND(msc) && soisthrottled(so)) {
1270 msc = MBUF_SC_BK;
1271 }
1272
1273 if (IS_MBUF_SC_BESTEFFORT(msc) && inp->inp_last_outifp != NULL &&
1274 so_throttle_best_effort(so, inp->inp_last_outifp)) {
1275 msc = MBUF_SC_BK;
1276 }
1277
1278 if (soissrcbackground(so)) {
1279 m->m_pkthdr.pkt_flags |= PKTF_SO_BACKGROUND;
1280 }
1281
1282 if (soissrcrealtime(so) || IS_MBUF_SC_REALTIME(msc)) {
1283 m->m_pkthdr.pkt_flags |= PKTF_SO_REALTIME;
1284 }
1285 /*
1286 * Set the traffic class in the mbuf packet header svc field
1287 */
1288 if (sotcdb & SOTCDB_NO_MTC) {
1289 goto no_mbtc;
1290 }
1291
1292 /*
1293 * Elevate service class if the packet is a pure TCP ACK.
1294 * We can do this only when the flow is not a background
1295 * flow and the outgoing interface supports
1296 * transmit-start model.
1297 */
1298 if (!IS_MBUF_SC_BACKGROUND(msc) &&
1299 (flags & (PKT_SCF_TCP_ACK | PKT_SCF_TCP_SYN)) != 0) {
1300 msc = MBUF_SC_CTL;
1301 }
1302
1303 (void) m_set_service_class(m, msc);
1304
1305 /*
1306 * Set the privileged traffic auxiliary flag if applicable,
1307 * or clear it.
1308 */
1309 if (!(sotcdb & SOTCDB_NO_PRIVILEGED) && soisprivilegedtraffic(so) &&
1310 msc != MBUF_SC_UNSPEC) {
1311 m->m_pkthdr.pkt_flags |= PKTF_PRIO_PRIVILEGED;
1312 } else {
1313 m->m_pkthdr.pkt_flags &= ~PKTF_PRIO_PRIVILEGED;
1314 }
1315
1316 no_mbtc:
1317 /*
1318 * For TCP with background traffic class switch CC algo based on sysctl
1319 */
1320 if (so->so_type == SOCK_STREAM) {
1321 set_tcp_stream_priority(so);
1322 }
1323
1324 so_tc_update_stats(m, so, msc);
1325 }
1326
1327 __private_extern__ void
so_tc_update_stats(struct mbuf * m,struct socket * so,mbuf_svc_class_t msc)1328 so_tc_update_stats(struct mbuf *m, struct socket *so, mbuf_svc_class_t msc)
1329 {
1330 mbuf_traffic_class_t mtc;
1331
1332 /*
1333 * Assume socket and mbuf traffic class values are the same
1334 * Also assume the socket lock is held. Note that the stats
1335 * at the socket layer are reduced down to the legacy traffic
1336 * classes; we could/should potentially expand so_tc_stats[].
1337 */
1338 mtc = MBUF_SC2TC(msc);
1339 VERIFY(mtc < SO_TC_STATS_MAX);
1340 so->so_tc_stats[mtc].txpackets += 1;
1341 so->so_tc_stats[mtc].txbytes += m->m_pkthdr.len;
1342 }
1343
1344 __private_extern__ mbuf_svc_class_t
so_tc2msc(int tc)1345 so_tc2msc(int tc)
1346 {
1347 mbuf_svc_class_t msc;
1348
1349 switch (tc) {
1350 case SO_TC_BK_SYS:
1351 msc = MBUF_SC_BK_SYS;
1352 break;
1353 case SO_TC_BK:
1354 case _SO_TC_BK:
1355 msc = MBUF_SC_BK;
1356 break;
1357 case SO_TC_BE:
1358 msc = MBUF_SC_BE;
1359 break;
1360 case SO_TC_RD:
1361 msc = MBUF_SC_RD;
1362 break;
1363 case SO_TC_OAM:
1364 msc = MBUF_SC_OAM;
1365 break;
1366 case SO_TC_AV:
1367 msc = MBUF_SC_AV;
1368 break;
1369 case SO_TC_RV:
1370 msc = MBUF_SC_RV;
1371 break;
1372 case SO_TC_VI:
1373 case _SO_TC_VI:
1374 msc = MBUF_SC_VI;
1375 break;
1376 case SO_TC_NETSVC_SIG:
1377 msc = MBUF_SC_SIG;
1378 break;
1379 case SO_TC_VO:
1380 case _SO_TC_VO:
1381 msc = MBUF_SC_VO;
1382 break;
1383 case SO_TC_CTL:
1384 msc = MBUF_SC_CTL;
1385 break;
1386 case SO_TC_ALL:
1387 default:
1388 msc = MBUF_SC_UNSPEC;
1389 break;
1390 }
1391
1392 return msc;
1393 }
1394
1395 __private_extern__ int
so_svc2tc(mbuf_svc_class_t svc)1396 so_svc2tc(mbuf_svc_class_t svc)
1397 {
1398 switch (svc) {
1399 case MBUF_SC_BK_SYS:
1400 return SO_TC_BK_SYS;
1401 case MBUF_SC_BK:
1402 return SO_TC_BK;
1403 case MBUF_SC_BE:
1404 return SO_TC_BE;
1405 case MBUF_SC_RD:
1406 return SO_TC_RD;
1407 case MBUF_SC_OAM:
1408 return SO_TC_OAM;
1409 case MBUF_SC_AV:
1410 return SO_TC_AV;
1411 case MBUF_SC_RV:
1412 return SO_TC_RV;
1413 case MBUF_SC_VI:
1414 return SO_TC_VI;
1415 case MBUF_SC_SIG:
1416 return SO_TC_NETSVC_SIG;
1417 case MBUF_SC_VO:
1418 return SO_TC_VO;
1419 case MBUF_SC_CTL:
1420 return SO_TC_CTL;
1421 case MBUF_SC_UNSPEC:
1422 default:
1423 return SO_TC_BE;
1424 }
1425 }
1426
1427 static size_t
sotc_index(int sotc)1428 sotc_index(int sotc)
1429 {
1430 switch (sotc) {
1431 case SO_TC_BK_SYS:
1432 return SOTCIX_BK_SYS;
1433 case _SO_TC_BK:
1434 case SO_TC_BK:
1435 return SOTCIX_BK;
1436
1437 case SO_TC_BE:
1438 return SOTCIX_BE;
1439 case SO_TC_RD:
1440 return SOTCIX_RD;
1441 case SO_TC_OAM:
1442 return SOTCIX_OAM;
1443
1444 case SO_TC_AV:
1445 return SOTCIX_AV;
1446 case SO_TC_RV:
1447 return SOTCIX_RV;
1448 case _SO_TC_VI:
1449 case SO_TC_VI:
1450 return SOTCIX_VI;
1451
1452 case _SO_TC_VO:
1453 case SO_TC_VO:
1454 return SOTCIX_VO;
1455 case SO_TC_CTL:
1456 return SOTCIX_CTL;
1457
1458 default:
1459 break;
1460 }
1461 /*
1462 * Unknown traffic class value
1463 */
1464 return SIZE_T_MAX;
1465 }
1466
1467 uint8_t
fastlane_sc_to_dscp(uint32_t svc_class)1468 fastlane_sc_to_dscp(uint32_t svc_class)
1469 {
1470 uint8_t dscp = _DSCP_DF;
1471
1472 switch (svc_class) {
1473 case MBUF_SC_BK_SYS:
1474 case MBUF_SC_BK:
1475 dscp = _DSCP_AF11;
1476 break;
1477
1478 case MBUF_SC_BE:
1479 dscp = _DSCP_DF;
1480 break;
1481 case MBUF_SC_RD:
1482 dscp = _DSCP_AF21;
1483 break;
1484 case MBUF_SC_OAM:
1485 dscp = _DSCP_CS2;
1486 break;
1487
1488 case MBUF_SC_AV:
1489 dscp = _DSCP_AF31;
1490 break;
1491 case MBUF_SC_RV:
1492 dscp = _DSCP_CS4;
1493 break;
1494 case MBUF_SC_VI:
1495 dscp = _DSCP_AF41;
1496 break;
1497 case MBUF_SC_SIG:
1498 dscp = _DSCP_CS3;
1499 break;
1500
1501 case MBUF_SC_VO:
1502 dscp = _DSCP_EF;
1503 break;
1504 case MBUF_SC_CTL:
1505 dscp = _DSCP_DF;
1506 break;
1507 default:
1508 dscp = _DSCP_DF;
1509 break;
1510 }
1511
1512 return dscp;
1513 }
1514
1515 uint8_t
rfc4594_sc_to_dscp(uint32_t svc_class)1516 rfc4594_sc_to_dscp(uint32_t svc_class)
1517 {
1518 uint8_t dscp = _DSCP_DF;
1519
1520 switch (svc_class) {
1521 case MBUF_SC_BK_SYS: /* Low-Priority Data */
1522 case MBUF_SC_BK:
1523 dscp = _DSCP_CS1;
1524 break;
1525
1526 case MBUF_SC_BE: /* Standard */
1527 dscp = _DSCP_DF;
1528 break;
1529 case MBUF_SC_RD: /* Low-Latency Data */
1530 dscp = _DSCP_AF21;
1531 break;
1532
1533 /* SVC_CLASS Not Defined: High-Throughput Data */
1534
1535 case MBUF_SC_OAM: /* OAM */
1536 dscp = _DSCP_CS2;
1537 break;
1538
1539 /* SVC_CLASS Not Defined: Broadcast Video */
1540
1541 case MBUF_SC_AV: /* Multimedia Streaming */
1542 dscp = _DSCP_AF31;
1543 break;
1544 case MBUF_SC_RV: /* Real-Time Interactive */
1545 dscp = _DSCP_CS4;
1546 break;
1547 case MBUF_SC_VI: /* Multimedia Conferencing */
1548 dscp = _DSCP_AF41;
1549 break;
1550 case MBUF_SC_SIG: /* Signaling */
1551 dscp = _DSCP_CS5;
1552 break;
1553
1554 case MBUF_SC_VO: /* Telephony */
1555 dscp = _DSCP_EF;
1556 break;
1557 case MBUF_SC_CTL: /* Network Control*/
1558 dscp = _DSCP_CS6;
1559 break;
1560 default:
1561 dscp = _DSCP_DF;
1562 break;
1563 }
1564
1565 return dscp;
1566 }
1567
1568 mbuf_traffic_class_t
rfc4594_dscp_to_tc(uint8_t dscp)1569 rfc4594_dscp_to_tc(uint8_t dscp)
1570 {
1571 mbuf_traffic_class_t tc = MBUF_TC_BE;
1572
1573 switch (dscp) {
1574 case _DSCP_CS1:
1575 tc = MBUF_TC_BK;
1576 break;
1577 case _DSCP_DF:
1578 case _DSCP_AF21:
1579 case _DSCP_CS2:
1580 tc = MBUF_TC_BE;
1581 break;
1582 case _DSCP_AF31:
1583 case _DSCP_CS4:
1584 case _DSCP_AF41:
1585 case _DSCP_CS5:
1586 tc = MBUF_TC_VI;
1587 break;
1588 case _DSCP_EF:
1589 case _DSCP_CS6:
1590 tc = MBUF_TC_VO;
1591 break;
1592 default:
1593 tc = MBUF_TC_BE;
1594 break;
1595 }
1596
1597 return tc;
1598 }
1599
1600 /*
1601 * Pass NULL ifp for default map
1602 */
1603 static errno_t
set_netsvctype_dscp_map(struct net_qos_dscp_map * net_qos_dscp_map,const struct netsvctype_dscp_map * netsvctype_dscp_map)1604 set_netsvctype_dscp_map(struct net_qos_dscp_map *net_qos_dscp_map,
1605 const struct netsvctype_dscp_map *netsvctype_dscp_map)
1606 {
1607 size_t i;
1608 int netsvctype;
1609
1610 /*
1611 * Do not accept more that max number of distinct DSCPs
1612 */
1613 if (net_qos_dscp_map == NULL || netsvctype_dscp_map == NULL) {
1614 return EINVAL;
1615 }
1616
1617 /*
1618 * Validate input parameters
1619 */
1620 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1621 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype_dscp_map[i].netsvctype)) {
1622 return EINVAL;
1623 }
1624 if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
1625 return EINVAL;
1626 }
1627 }
1628
1629 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1630 netsvctype = netsvctype_dscp_map[i].netsvctype;
1631
1632 net_qos_dscp_map->netsvctype_to_dscp[netsvctype] =
1633 netsvctype_dscp_map[i].dscp;
1634 }
1635 for (netsvctype = 0; netsvctype < _NET_SERVICE_TYPE_COUNT; netsvctype++) {
1636 switch (netsvctype) {
1637 case NET_SERVICE_TYPE_BE:
1638 case NET_SERVICE_TYPE_BK:
1639 case NET_SERVICE_TYPE_VI:
1640 case NET_SERVICE_TYPE_VO:
1641 case NET_SERVICE_TYPE_RV:
1642 case NET_SERVICE_TYPE_AV:
1643 case NET_SERVICE_TYPE_OAM:
1644 case NET_SERVICE_TYPE_RD: {
1645 size_t sotcix;
1646
1647 sotcix = sotc_index(sotc_by_netservicetype[netsvctype]);
1648 if (sotcix != SIZE_T_MAX) {
1649 net_qos_dscp_map->sotc_to_dscp[sotcix] =
1650 netsvctype_dscp_map[netsvctype].dscp;
1651 }
1652 break;
1653 }
1654 case NET_SERVICE_TYPE_SIG:
1655 /* Signaling does not have its own traffic class */
1656 break;
1657 default:
1658 /* We should not be here */
1659 ASSERT(0);
1660 }
1661 }
1662 if (net_qos_dscp_map == &fastlane_net_qos_dscp_map) {
1663 /* Network control socket traffic class is always best effort for fastlane*/
1664 net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_DF;
1665 } else {
1666 net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_CS6;
1667 }
1668
1669 /* Background system socket traffic class DSCP same as background */
1670 net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK_SYS] =
1671 net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK];
1672
1673 return 0;
1674 }
1675
1676 static size_t
get_netsvctype_dscp_map(struct netsvctype_dscp_map * netsvctype_dscp_map)1677 get_netsvctype_dscp_map(struct netsvctype_dscp_map *netsvctype_dscp_map)
1678 {
1679 struct net_qos_dscp_map *net_qos_dscp_map;
1680 int i;
1681
1682 net_qos_dscp_map = &fastlane_net_qos_dscp_map;
1683
1684 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1685 netsvctype_dscp_map[i].netsvctype = i;
1686 netsvctype_dscp_map[i].dscp = net_qos_dscp_map->netsvctype_to_dscp[i];
1687 }
1688
1689 return i * sizeof(struct netsvctype_dscp_map);
1690 }
1691
1692 void
net_qos_map_init()1693 net_qos_map_init()
1694 {
1695 errno_t error;
1696
1697 error = set_netsvctype_dscp_map(&fastlane_net_qos_dscp_map,
1698 fastlane_netsvctype_dscp_map);
1699 ASSERT(error == 0);
1700
1701 error = set_netsvctype_dscp_map(&rfc4594_net_qos_dscp_map,
1702 rfc4594_netsvctype_dscp_map);
1703 ASSERT(error == 0);
1704
1705 #if (DEBUG || DEVELOPMENT)
1706 error = set_netsvctype_dscp_map(&custom_net_qos_dscp_map,
1707 rfc4594_netsvctype_dscp_map);
1708 ASSERT(error == 0);
1709
1710 #endif /* (DEBUG || DEVELOPMENT) */
1711
1712 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
1713 }
1714
1715 int
1716 sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS
1717 {
1718 #pragma unused(oidp, arg1, arg2)
1719 int error = 0;
1720
1721 if (req->oldptr == USER_ADDR_NULL) {
1722 req->oldidx =
1723 _NET_SERVICE_TYPE_COUNT * sizeof(struct netsvctype_dscp_map);
1724 } else if (req->oldlen > 0) {
1725 struct netsvctype_dscp_map netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {};
1726 size_t len;
1727
1728 len = get_netsvctype_dscp_map(netsvctype_dscp_map);
1729
1730 error = SYSCTL_OUT(req, netsvctype_dscp_map,
1731 MIN(len, req->oldlen));
1732 if (error != 0) {
1733 goto done;
1734 }
1735 }
1736
1737 if (req->newptr != USER_ADDR_NULL) {
1738 error = EPERM;
1739 }
1740 done:
1741 return error;
1742 }
1743
1744 __private_extern__ errno_t
set_packet_qos(struct mbuf * m,struct ifnet * ifp,boolean_t qos_allowed,int sotc,int netsvctype,uint8_t * dscp_inout)1745 set_packet_qos(struct mbuf *m, struct ifnet *ifp, boolean_t qos_allowed,
1746 int sotc, int netsvctype, uint8_t *dscp_inout)
1747 {
1748 if (ifp == NULL || dscp_inout == NULL) {
1749 return EINVAL;
1750 }
1751
1752 if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0 &&
1753 ifp->if_qosmarking_mode != IFRTYPE_QOSMARKING_MODE_NONE) {
1754 uint8_t dscp;
1755 const struct net_qos_dscp_map *net_qos_dscp_map = NULL;
1756
1757 switch (ifp->if_qosmarking_mode) {
1758 case IFRTYPE_QOSMARKING_FASTLANE:
1759 net_qos_dscp_map = &fastlane_net_qos_dscp_map;
1760 break;
1761 case IFRTYPE_QOSMARKING_RFC4594:
1762 net_qos_dscp_map = &rfc4594_net_qos_dscp_map;
1763 break;
1764 #if (DEBUG || DEVELOPMENT)
1765 case IFRTYPE_QOSMARKING_CUSTOM:
1766 net_qos_dscp_map = &custom_net_qos_dscp_map;
1767 break;
1768 #endif /* (DEBUG || DEVELOPMENT) */
1769 default:
1770 panic("invalid QoS marking type");
1771 /* NOTREACHED */
1772 }
1773
1774 /*
1775 * When on a Fastlane network, IP_TOS/IPV6_TCLASS are no-ops
1776 */
1777 dscp = _DSCP_DF;
1778
1779 /*
1780 * For DSCP use the network service type is specified, otherwise
1781 * use the socket traffic class
1782 *
1783 * When not whitelisted by the policy, set DSCP only for best
1784 * effort and background, and set the mbuf service class to
1785 * best effort as well so the packet will be queued and
1786 * scheduled at a lower priority.
1787 * We still want to prioritize control traffic on the interface
1788 * so we do not change the mbuf service class for SO_TC_CTL
1789 */
1790 if (IS_VALID_NET_SERVICE_TYPE(netsvctype) &&
1791 netsvctype != NET_SERVICE_TYPE_BE) {
1792 dscp = net_qos_dscp_map->netsvctype_to_dscp[netsvctype];
1793
1794 if (qos_allowed == FALSE &&
1795 netsvctype != NET_SERVICE_TYPE_BE &&
1796 netsvctype != NET_SERVICE_TYPE_BK) {
1797 dscp = _DSCP_DF;
1798 if (sotc != SO_TC_CTL) {
1799 m_set_service_class(m, MBUF_SC_BE);
1800 }
1801 }
1802 } else if (sotc != SO_TC_UNSPEC) {
1803 size_t sotcix = sotc_index(sotc);
1804 if (sotcix != SIZE_T_MAX) {
1805 dscp = net_qos_dscp_map->sotc_to_dscp[sotcix];
1806
1807 if (qos_allowed == FALSE && sotc != SO_TC_BE &&
1808 sotc != SO_TC_BK && sotc != SO_TC_BK_SYS &&
1809 sotc != SO_TC_CTL) {
1810 dscp = _DSCP_DF;
1811 if (sotc != SO_TC_CTL) {
1812 m_set_service_class(m, MBUF_SC_BE);
1813 }
1814 }
1815 }
1816 }
1817 if (net_qos_verbose != 0) {
1818 printf("%s qos_allowed %d sotc %u netsvctype %u dscp %u\n",
1819 __func__, qos_allowed, sotc, netsvctype, dscp);
1820 }
1821
1822 if (*dscp_inout != dscp) {
1823 *dscp_inout = dscp;
1824 }
1825 } else if (*dscp_inout != _DSCP_DF && IFNET_IS_WIFI_INFRA(ifp)) {
1826 mbuf_svc_class_t msc = m_get_service_class(m);
1827
1828 /*
1829 * For WiFi infra, when the mbuf service class is best effort
1830 * and the DSCP is not default, set the service class based
1831 * on DSCP
1832 */
1833 if (msc == MBUF_SC_BE) {
1834 msc = wifi_dscp_to_msc_array[*dscp_inout];
1835
1836 if (msc != MBUF_SC_BE) {
1837 m_set_service_class(m, msc);
1838
1839 if (net_qos_verbose != 0) {
1840 printf("%s set msc %u for dscp %u\n",
1841 __func__, msc, *dscp_inout);
1842 }
1843 }
1844 }
1845 }
1846
1847 return 0;
1848 }
1849
1850 static void
set_dscp_to_wifi_ac_map(const struct dcsp_msc_map * map,int clear)1851 set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *map, int clear)
1852 {
1853 int i;
1854
1855 if (clear) {
1856 bzero(wifi_dscp_to_msc_array, sizeof(wifi_dscp_to_msc_array));
1857 }
1858
1859 for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
1860 const struct dcsp_msc_map *elem = map + i;
1861
1862 if (elem->dscp > _MAX_DSCP || elem->msc == MBUF_SC_UNSPEC) {
1863 break;
1864 }
1865 switch (elem->msc) {
1866 case MBUF_SC_BK_SYS:
1867 case MBUF_SC_BK:
1868 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BK;
1869 break;
1870 default:
1871 case MBUF_SC_BE:
1872 case MBUF_SC_RD:
1873 case MBUF_SC_OAM:
1874 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BE;
1875 break;
1876 case MBUF_SC_AV:
1877 case MBUF_SC_RV:
1878 case MBUF_SC_VI:
1879 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VI;
1880 break;
1881 case MBUF_SC_VO:
1882 case MBUF_SC_CTL:
1883 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VO;
1884 break;
1885 }
1886 }
1887 }
1888
1889 static errno_t
dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map * netsvctype_dscp_map,size_t count,struct dcsp_msc_map * dcsp_msc_map)1890 dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *netsvctype_dscp_map,
1891 size_t count, struct dcsp_msc_map *dcsp_msc_map)
1892 {
1893 errno_t error = 0;
1894 uint32_t i;
1895
1896 /*
1897 * Validate input parameters
1898 */
1899 for (i = 0; i < count; i++) {
1900 if (!SO_VALID_TC(netsvctype_dscp_map[i].netsvctype)) {
1901 error = EINVAL;
1902 goto done;
1903 }
1904 if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
1905 error = EINVAL;
1906 goto done;
1907 }
1908 }
1909
1910 bzero(dcsp_msc_map, DSCP_ARRAY_SIZE * sizeof(struct dcsp_msc_map));
1911
1912 for (i = 0; i < count; i++) {
1913 dcsp_msc_map[i].dscp = netsvctype_dscp_map[i].dscp;
1914 dcsp_msc_map[i].msc = so_tc2msc(netsvctype_dscp_map[i].netsvctype);
1915 }
1916 done:
1917 return error;
1918 }
1919
1920 int
1921 sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
1922 {
1923 #pragma unused(oidp, arg1, arg2)
1924 int error = 0;
1925 size_t len = DSCP_ARRAY_SIZE * sizeof(struct netsvctype_dscp_map);
1926 struct netsvctype_dscp_map netsvctype_dscp_map[DSCP_ARRAY_SIZE] = {};
1927 struct dcsp_msc_map dcsp_msc_map[DSCP_ARRAY_SIZE];
1928 size_t count;
1929
1930 if (req->oldptr == USER_ADDR_NULL) {
1931 req->oldidx = len;
1932 } else if (req->oldlen > 0) {
1933 uint8_t i;
1934
1935 for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
1936 netsvctype_dscp_map[i].dscp = i;
1937 netsvctype_dscp_map[i].netsvctype =
1938 so_svc2tc(wifi_dscp_to_msc_array[i]);
1939 }
1940 error = SYSCTL_OUT(req, netsvctype_dscp_map,
1941 MIN(len, req->oldlen));
1942 if (error != 0) {
1943 goto done;
1944 }
1945 }
1946
1947 if (req->newptr == USER_ADDR_NULL) {
1948 goto done;
1949 }
1950
1951 error = proc_suser(current_proc());
1952 if (error != 0) {
1953 goto done;
1954 }
1955
1956 /*
1957 * Check input length
1958 */
1959 if (req->newlen > len) {
1960 error = EINVAL;
1961 goto done;
1962 }
1963 /*
1964 * Cap the number of entries to copy from input buffer
1965 */
1966 if (len > req->newlen) {
1967 len = req->newlen;
1968 }
1969 error = SYSCTL_IN(req, netsvctype_dscp_map, len);
1970 if (error != 0) {
1971 goto done;
1972 }
1973 count = len / sizeof(struct netsvctype_dscp_map);
1974 bzero(dcsp_msc_map, sizeof(dcsp_msc_map));
1975 error = dscp_msc_map_from_netsvctype_dscp_map(netsvctype_dscp_map, count,
1976 dcsp_msc_map);
1977 if (error != 0) {
1978 goto done;
1979 }
1980 set_dscp_to_wifi_ac_map(dcsp_msc_map, 0);
1981 done:
1982 return error;
1983 }
1984
1985 int
1986 sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
1987 {
1988 #pragma unused(oidp, arg1, arg2)
1989 int error = 0;
1990 int val = 0;
1991
1992 error = sysctl_handle_int(oidp, &val, 0, req);
1993 if (error || !req->newptr) {
1994 return error;
1995 }
1996 if (req->newptr == USER_ADDR_NULL) {
1997 return 0;
1998 }
1999 error = proc_suser(current_proc());
2000 if (error != 0) {
2001 return error;
2002 }
2003
2004 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
2005
2006 return 0;
2007 }
2008
2009 /*
2010 * Returns whether a large upload or download transfer should be marked as
2011 * BK service type for network activity. This is a system level
2012 * hint/suggestion to classify application traffic based on statistics
2013 * collected from the current network attachment
2014 *
2015 * Returns 1 for BK and 0 for default
2016 */
2017
2018 int
net_qos_guideline(struct proc * p,struct net_qos_guideline_args * arg,int * retval)2019 net_qos_guideline(struct proc *p, struct net_qos_guideline_args *arg,
2020 int *retval)
2021 {
2022 #pragma unused(p)
2023 #define RETURN_USE_BK 1
2024 #define RETURN_USE_DEFAULT 0
2025 struct net_qos_param qos_arg;
2026 struct ifnet *ipv4_primary, *ipv6_primary;
2027 int err = 0;
2028
2029 if (arg->param == USER_ADDR_NULL || retval == NULL ||
2030 arg->param_len != sizeof(qos_arg)) {
2031 return EINVAL;
2032 }
2033 err = copyin(arg->param, (caddr_t) &qos_arg, sizeof(qos_arg));
2034 if (err != 0) {
2035 return err;
2036 }
2037
2038 *retval = RETURN_USE_DEFAULT;
2039 ipv4_primary = ifindex2ifnet[get_primary_ifscope(AF_INET)];
2040 ipv6_primary = ifindex2ifnet[get_primary_ifscope(AF_INET6)];
2041
2042 /*
2043 * If either of the interfaces is in Low Internet mode, enable
2044 * background delay based algorithms on this transfer
2045 */
2046 if (qos_arg.nq_uplink) {
2047 if ((ipv4_primary != NULL &&
2048 (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_UL)) ||
2049 (ipv6_primary != NULL &&
2050 (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_UL))) {
2051 *retval = RETURN_USE_BK;
2052 return 0;
2053 }
2054 } else {
2055 if ((ipv4_primary != NULL &&
2056 (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_DL)) ||
2057 (ipv6_primary != NULL &&
2058 (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_DL))) {
2059 *retval = RETURN_USE_BK;
2060 return 0;
2061 }
2062 }
2063
2064 /*
2065 * Some times IPv4 and IPv6 primary interfaces can be different.
2066 * In this case, if either of them is non-cellular, we should mark
2067 * the transfer as BK as it can potentially get used based on
2068 * the host name resolution
2069 */
2070 if (ipv4_primary != NULL && IFNET_IS_EXPENSIVE(ipv4_primary) &&
2071 ipv6_primary != NULL && IFNET_IS_EXPENSIVE(ipv6_primary)) {
2072 if (qos_arg.nq_use_expensive) {
2073 return 0;
2074 } else {
2075 *retval = RETURN_USE_BK;
2076 return 0;
2077 }
2078 }
2079 if (ipv4_primary != NULL && IFNET_IS_CONSTRAINED(ipv4_primary) &&
2080 ipv6_primary != NULL && IFNET_IS_CONSTRAINED(ipv6_primary)) {
2081 if (qos_arg.nq_use_constrained) {
2082 return 0;
2083 } else {
2084 *retval = RETURN_USE_BK;
2085 return 0;
2086 }
2087 }
2088 if (qos_arg.nq_transfer_size >= 5 * 1024 * 1024) {
2089 *retval = RETURN_USE_BK;
2090 return 0;
2091 }
2092
2093
2094 #undef RETURN_USE_BK
2095 #undef RETURN_USE_DEFAULT
2096 return 0;
2097 }
2098
2099 #if (DEBUG || DEVELOPMENT)
2100 /*
2101 * Customizable QoS mapping table
2102 * By default it uses the mapping table for RFC 4594
2103 *
2104 * Notes:
2105 * BK_SYS is the same as BK
2106 * CTL cannot be changed and is always _DSCP_CS6
2107 */
2108 SYSCTL_NODE(_net_qos, OID_AUTO, custom,
2109 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
2110
2111 SYSCTL_NODE(_net_qos_custom, OID_AUTO, netsvctype_to_dscp,
2112 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
2113
2114 static int sysctl_net_qos_custom_netsvctype_to_dscp SYSCTL_HANDLER_ARGS;
2115 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, be,
2116 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2117 0, NET_SERVICE_TYPE_BE, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2118 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, bk,
2119 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2120 0, NET_SERVICE_TYPE_BK, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2121 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, sig,
2122 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2123 0, NET_SERVICE_TYPE_SIG, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2124 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, vi,
2125 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2126 0, NET_SERVICE_TYPE_VI, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2127 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, vo,
2128 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2129 0, NET_SERVICE_TYPE_VO, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2130 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, rv,
2131 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2132 0, NET_SERVICE_TYPE_RV, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2133 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, av,
2134 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2135 0, NET_SERVICE_TYPE_AV, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2136 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, oam,
2137 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2138 0, NET_SERVICE_TYPE_OAM, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2139 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, rd,
2140 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2141 0, NET_SERVICE_TYPE_RD, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2142
2143 static int sysctl_net_qos_custom_reset SYSCTL_HANDLER_ARGS;
2144 SYSCTL_PROC(_net_qos_custom, OID_AUTO, reset,
2145 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2146 0, 0, sysctl_net_qos_custom_reset, "I", "");
2147
2148 int
2149 sysctl_net_qos_custom_netsvctype_to_dscp SYSCTL_HANDLER_ARGS
2150 {
2151 #pragma unused(arg1)
2152 int error = 0;
2153
2154 switch (arg2) {
2155 case NET_SERVICE_TYPE_BE:
2156 case NET_SERVICE_TYPE_BK:
2157 case NET_SERVICE_TYPE_SIG:
2158 case NET_SERVICE_TYPE_VI:
2159 case NET_SERVICE_TYPE_VO:
2160 case NET_SERVICE_TYPE_RV:
2161 case NET_SERVICE_TYPE_AV:
2162 case NET_SERVICE_TYPE_OAM:
2163 case NET_SERVICE_TYPE_RD:
2164 break;
2165 default:
2166 os_log(OS_LOG_DEFAULT, "%s: unexpected netsvctype %d",
2167 __func__, arg2);
2168 return EINVAL;
2169 }
2170
2171 int val = custom_net_qos_dscp_map.netsvctype_to_dscp[arg2];
2172 error = sysctl_handle_int(oidp, &val, 0, req);
2173 if (error != 0 || req->newptr == USER_ADDR_NULL) {
2174 return error;
2175 }
2176 if (req->newptr == USER_ADDR_NULL) {
2177 return 0;
2178 }
2179 error = proc_suser(current_proc());
2180 if (error != 0) {
2181 return error;
2182 }
2183 if (val < 0 || val > _MAX_DSCP) {
2184 os_log(OS_LOG_DEFAULT, "%s: unexpected DSCP %d",
2185 __func__, val);
2186 return EINVAL;
2187 }
2188
2189 struct netsvctype_dscp_map netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {};
2190
2191 for (int i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
2192 netsvctype_dscp_map[i].netsvctype = i;
2193 netsvctype_dscp_map[i].dscp = custom_net_qos_dscp_map.netsvctype_to_dscp[i];
2194 }
2195 netsvctype_dscp_map[arg2].dscp = (uint8_t) val;
2196
2197 error = set_netsvctype_dscp_map(&custom_net_qos_dscp_map,
2198 netsvctype_dscp_map);
2199
2200 return 0;
2201 }
2202
2203 int
2204 sysctl_net_qos_custom_reset SYSCTL_HANDLER_ARGS
2205 {
2206 #pragma unused(arg1, arg2)
2207 int error = 0;
2208 int val = 0;
2209
2210 error = sysctl_handle_int(oidp, &val, 0, req);
2211 if (error || !req->newptr) {
2212 return error;
2213 }
2214 if (req->newptr == USER_ADDR_NULL) {
2215 return 0;
2216 }
2217 error = proc_suser(current_proc());
2218 if (error != 0) {
2219 return error;
2220 }
2221
2222 error = set_netsvctype_dscp_map(&custom_net_qos_dscp_map,
2223 rfc4594_netsvctype_dscp_map);
2224
2225 return error;
2226 }
2227
2228 uint8_t
custom_sc_to_dscp(uint32_t svc_class)2229 custom_sc_to_dscp(uint32_t svc_class)
2230 {
2231 uint8_t dscp = _DSCP_DF;
2232
2233 switch (svc_class) {
2234 case MBUF_SC_BK_SYS:
2235 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_BK_SYS];
2236 break;
2237 case MBUF_SC_BK:
2238 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_BK];
2239 break;
2240
2241 case MBUF_SC_BE:
2242 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_BE];
2243 break;
2244 case MBUF_SC_RD:
2245 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_RD];
2246 break;
2247 case MBUF_SC_OAM:
2248 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_OAM];
2249 break;
2250
2251 case MBUF_SC_AV:
2252 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_AV];
2253 break;
2254 case MBUF_SC_RV:
2255 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_RV];
2256 break;
2257 case MBUF_SC_VI:
2258 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_VI];
2259 break;
2260 case MBUF_SC_SIG:
2261 dscp = custom_net_qos_dscp_map.netsvctype_to_dscp[NET_SERVICE_TYPE_SIG];
2262 break;
2263
2264 case MBUF_SC_VO:
2265 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_VO];
2266 break;
2267 case MBUF_SC_CTL:
2268 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_CTL];
2269 break;
2270 default:
2271 break;
2272 }
2273 return dscp;
2274 }
2275 #endif /* (DEBUG || DEVELOPMENT) */
2276