1 /*
2 * Copyright (c) 2009-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/types.h>
32 #include <sys/filedesc.h>
33 #include <sys/file_internal.h>
34 #include <sys/proc.h>
35 #include <sys/socket.h>
36 #include <sys/socketvar.h>
37 #include <sys/errno.h>
38 #include <sys/protosw.h>
39 #include <sys/domain.h>
40 #include <sys/mbuf.h>
41 #include <sys/queue.h>
42 #include <sys/sysctl.h>
43 #include <sys/sysproto.h>
44
45 #include <net/if.h>
46 #include <net/if_var.h>
47 #include <net/route.h>
48
49 #include <netinet/in.h>
50 #include <netinet/in_var.h>
51 #include <netinet/in_pcb.h>
52 #include <netinet/ip.h>
53 #include <netinet/ip_var.h>
54 #include <netinet/ip6.h>
55 #include <netinet6/ip6_var.h>
56 #include <netinet/udp.h>
57 #include <netinet/udp_var.h>
58 #include <netinet/tcp.h>
59 #include <netinet/tcp_var.h>
60 #include <netinet/tcp_cc.h>
61 #include <netinet/in_tclass.h>
62
63 #include <os/log.h>
64
65 static_assert(_SO_TC_MAX == SO_TC_STATS_MAX);
66
67 struct net_qos_dscp_map {
68 uint8_t sotc_to_dscp[SO_TC_MAX];
69 uint8_t netsvctype_to_dscp[_NET_SERVICE_TYPE_COUNT];
70 };
71
72 struct dcsp_msc_map {
73 uint8_t dscp;
74 mbuf_svc_class_t msc;
75 };
76 static inline int so_throttle_best_effort(struct socket *, struct ifnet *);
77 static void set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *, int);
78 static errno_t dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *, size_t,
79 struct dcsp_msc_map *);
80
81 static LCK_GRP_DECLARE(tclass_lck_grp, "tclass");
82 static LCK_MTX_DECLARE(tclass_lock, &tclass_lck_grp);
83
84 SYSCTL_NODE(_net, OID_AUTO, qos,
85 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "QoS");
86
87 static int sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS;
88 SYSCTL_PROC(_net_qos, OID_AUTO, default_netsvctype_to_dscp_map,
89 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
90 0, 0, sysctl_default_netsvctype_to_dscp_map, "S", "");
91
92 static int sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
93 SYSCTL_PROC(_net_qos, OID_AUTO, dscp_to_wifi_ac_map,
94 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
95 0, 0, sysctl_dscp_to_wifi_ac_map, "S", "");
96
97 static int sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
98 SYSCTL_PROC(_net_qos, OID_AUTO, reset_dscp_to_wifi_ac_map,
99 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
100 0, 0, sysctl_reset_dscp_to_wifi_ac_map, "I", "");
101
102 int net_qos_verbose = 0;
103 SYSCTL_INT(_net_qos, OID_AUTO, verbose,
104 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_verbose, 0, "");
105
106 /*
107 * Fastlane QoS policy:
108 * By Default allow all apps to get traffic class to DSCP mapping
109 */
110 SYSCTL_NODE(_net_qos, OID_AUTO, policy,
111 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
112
113 int net_qos_policy_restricted = 0;
114 SYSCTL_INT(_net_qos_policy, OID_AUTO, restricted,
115 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restricted, 0, "");
116
117 int net_qos_policy_restrict_avapps = 0;
118 SYSCTL_INT(_net_qos_policy, OID_AUTO, restrict_avapps,
119 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restrict_avapps, 0, "");
120
121 int net_qos_policy_wifi_enabled = 0;
122 SYSCTL_INT(_net_qos_policy, OID_AUTO, wifi_enabled,
123 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_wifi_enabled, 0, "");
124
125 int net_qos_policy_capable_enabled = 0;
126 SYSCTL_INT(_net_qos_policy, OID_AUTO, capable_enabled,
127 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_capable_enabled, 0, "");
128
129 /*
130 * Socket traffic class from network service type
131 */
132 const int sotc_by_netservicetype[_NET_SERVICE_TYPE_COUNT] = {
133 SO_TC_BE, /* NET_SERVICE_TYPE_BE */
134 SO_TC_BK, /* NET_SERVICE_TYPE_BK */
135 SO_TC_VI, /* NET_SERVICE_TYPE_SIG */
136 SO_TC_VI, /* NET_SERVICE_TYPE_VI */
137 SO_TC_VO, /* NET_SERVICE_TYPE_VO */
138 SO_TC_RV, /* NET_SERVICE_TYPE_RV */
139 SO_TC_AV, /* NET_SERVICE_TYPE_AV */
140 SO_TC_OAM, /* NET_SERVICE_TYPE_OAM */
141 SO_TC_RD /* NET_SERVICE_TYPE_RD */
142 };
143
144 /*
145 * DSCP mappings for QoS Fastlane as based on network service types
146 */
147 static const
148 struct netsvctype_dscp_map fastlane_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
149 { .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF },
150 { .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_AF11 },
151 { .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS3 },
152 { .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 },
153 { .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF },
154 { .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 },
155 { .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 },
156 { .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 },
157 { .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 },
158 };
159
160 /*
161 * DSCP mappings for QoS RFC4594 as based on network service types
162 */
163 static const
164 struct netsvctype_dscp_map rfc4594_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
165 { .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF },
166 { .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_CS1 },
167 { .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS5 },
168 { .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 },
169 { .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF },
170 { .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 },
171 { .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 },
172 { .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 },
173 { .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 },
174 };
175
176 static struct net_qos_dscp_map fastlane_net_qos_dscp_map;
177 static struct net_qos_dscp_map rfc4594_net_qos_dscp_map;
178 #if (DEBUG || DEVELOPMENT)
179 static struct net_qos_dscp_map custom_net_qos_dscp_map;
180 #endif /* (DEBUG || DEVELOPMENT) */
181
182 /*
183 * The size is one more than the max because DSCP start at zero
184 */
185 #define DSCP_ARRAY_SIZE (_MAX_DSCP + 1)
186
187 /*
188 * The DSCP to UP mapping (via mbuf service class) for WiFi follows is the mapping
189 * that implemented at the 802.11 driver level when the mbuf service class is
190 * MBUF_SC_BE.
191 *
192 * This clashes with the recommended mapping documented by the IETF document
193 * draft-szigeti-tsvwg-ieee-802-11e-01.txt but we keep the mapping to maintain
194 * binary compatibility. Applications should use the network service type socket
195 * option instead to select L2 QoS marking instead of IP_TOS or IPV6_TCLASS.
196 */
197 static const struct dcsp_msc_map default_dscp_to_wifi_ac_map[] = {
198 { .dscp = _DSCP_DF, .msc = MBUF_SC_BE }, /* RFC 2474 Standard */
199 { .dscp = 1, .msc = MBUF_SC_BE }, /* */
200 { .dscp = 2, .msc = MBUF_SC_BE }, /* */
201 { .dscp = 3, .msc = MBUF_SC_BE }, /* */
202 { .dscp = 4, .msc = MBUF_SC_BE }, /* */
203 { .dscp = 5, .msc = MBUF_SC_BE }, /* */
204 { .dscp = 6, .msc = MBUF_SC_BE }, /* */
205 { .dscp = 7, .msc = MBUF_SC_BE }, /* */
206
207 { .dscp = _DSCP_CS1, .msc = MBUF_SC_BK }, /* RFC 3662 Low-Priority Data */
208 { .dscp = 9, .msc = MBUF_SC_BK }, /* */
209 { .dscp = _DSCP_AF11, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
210 { .dscp = 11, .msc = MBUF_SC_BK }, /* */
211 { .dscp = _DSCP_AF12, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
212 { .dscp = 13, .msc = MBUF_SC_BK }, /* */
213 { .dscp = _DSCP_AF13, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
214 { .dscp = 15, .msc = MBUF_SC_BK }, /* */
215
216 { .dscp = _DSCP_CS2, .msc = MBUF_SC_BK }, /* RFC 4594 OAM */
217 { .dscp = 17, .msc = MBUF_SC_BK }, /* */
218 { .dscp = _DSCP_AF21, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
219 { .dscp = 19, .msc = MBUF_SC_BK }, /* */
220 { .dscp = _DSCP_AF22, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
221 { .dscp = 21, .msc = MBUF_SC_BK }, /* */
222 { .dscp = _DSCP_AF23, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
223 { .dscp = 23, .msc = MBUF_SC_BK }, /* */
224
225 { .dscp = _DSCP_CS3, .msc = MBUF_SC_BE }, /* RFC 2474 Broadcast Video */
226 { .dscp = 25, .msc = MBUF_SC_BE }, /* */
227 { .dscp = _DSCP_AF31, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
228 { .dscp = 27, .msc = MBUF_SC_BE }, /* */
229 { .dscp = _DSCP_AF32, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
230 { .dscp = 29, .msc = MBUF_SC_BE }, /* */
231 { .dscp = _DSCP_AF33, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
232 { .dscp = 31, .msc = MBUF_SC_BE }, /* */
233
234 { .dscp = _DSCP_CS4, .msc = MBUF_SC_VI }, /* RFC 2474 Real-Time Interactive */
235 { .dscp = 33, .msc = MBUF_SC_VI }, /* */
236 { .dscp = _DSCP_AF41, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
237 { .dscp = 35, .msc = MBUF_SC_VI }, /* */
238 { .dscp = _DSCP_AF42, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
239 { .dscp = 37, .msc = MBUF_SC_VI }, /* */
240 { .dscp = _DSCP_AF43, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
241 { .dscp = 39, .msc = MBUF_SC_VI }, /* */
242
243 { .dscp = _DSCP_CS5, .msc = MBUF_SC_VI }, /* RFC 2474 Signaling */
244 { .dscp = 41, .msc = MBUF_SC_VI }, /* */
245 { .dscp = 42, .msc = MBUF_SC_VI }, /* */
246 { .dscp = 43, .msc = MBUF_SC_VI }, /* */
247 { .dscp = _DSCP_VA, .msc = MBUF_SC_VI }, /* RFC 5865 VOICE-ADMIT */
248 { .dscp = 45, .msc = MBUF_SC_VI }, /* */
249 { .dscp = _DSCP_EF, .msc = MBUF_SC_VI }, /* RFC 3246 Telephony */
250 { .dscp = 47, .msc = MBUF_SC_VI }, /* */
251
252 { .dscp = _DSCP_CS6, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Chariot */
253 { .dscp = 49, .msc = MBUF_SC_VO }, /* */
254 { .dscp = 50, .msc = MBUF_SC_VO }, /* */
255 { .dscp = 51, .msc = MBUF_SC_VO }, /* */
256 { .dscp = 52, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Sigma */
257 { .dscp = 53, .msc = MBUF_SC_VO }, /* */
258 { .dscp = 54, .msc = MBUF_SC_VO }, /* */
259 { .dscp = 55, .msc = MBUF_SC_VO }, /* */
260
261 { .dscp = _DSCP_CS7, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Chariot */
262 { .dscp = 57, .msc = MBUF_SC_VO }, /* */
263 { .dscp = 58, .msc = MBUF_SC_VO }, /* */
264 { .dscp = 59, .msc = MBUF_SC_VO }, /* */
265 { .dscp = 60, .msc = MBUF_SC_VO }, /* */
266 { .dscp = 61, .msc = MBUF_SC_VO }, /* */
267 { .dscp = 62, .msc = MBUF_SC_VO }, /* */
268 { .dscp = 63, .msc = MBUF_SC_VO }, /* */
269
270 { .dscp = 255, .msc = MBUF_SC_UNSPEC } /* invalid DSCP to mark last entry */
271 };
272
273 mbuf_svc_class_t wifi_dscp_to_msc_array[DSCP_ARRAY_SIZE];
274
275 /*
276 * If there is no foreground activity on the interface for bg_switch_time
277 * seconds, the background connections can switch to foreground TCP
278 * congestion control.
279 */
280 #define TCP_BG_SWITCH_TIME 2 /* seconds */
281
282 #if (DEVELOPMENT || DEBUG)
283
284 static int tfp_count = 0;
285
286 static TAILQ_HEAD(, tclass_for_proc) tfp_head =
287 TAILQ_HEAD_INITIALIZER(tfp_head);
288
289 struct tclass_for_proc {
290 TAILQ_ENTRY(tclass_for_proc) tfp_link;
291 int tfp_class;
292 pid_t tfp_pid;
293 char tfp_pname[(2 * MAXCOMLEN) + 1];
294 uint32_t tfp_qos_mode;
295 };
296
297 static int get_pid_tclass(struct so_tcdbg *);
298 static int get_pname_tclass(struct so_tcdbg *);
299 static int set_pid_tclass(struct so_tcdbg *);
300 static int set_pname_tclass(struct so_tcdbg *);
301 static int flush_pid_tclass(struct so_tcdbg *);
302 static int purge_tclass_for_proc(void);
303 static int flush_tclass_for_proc(void);
304 static void set_tclass_for_curr_proc(struct socket *);
305
306 /*
307 * Must be called with tclass_lock held
308 */
309 static struct tclass_for_proc *
find_tfp_by_pid(pid_t pid)310 find_tfp_by_pid(pid_t pid)
311 {
312 struct tclass_for_proc *tfp;
313
314 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
315 if (tfp->tfp_pid == pid) {
316 break;
317 }
318 }
319 return tfp;
320 }
321
322 /*
323 * Must be called with tclass_lock held
324 */
325 static struct tclass_for_proc *
find_tfp_by_pname(const char * pname)326 find_tfp_by_pname(const char *pname)
327 {
328 struct tclass_for_proc *tfp;
329
330 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
331 if (strncmp(pname, tfp->tfp_pname,
332 sizeof(tfp->tfp_pname)) == 0) {
333 break;
334 }
335 }
336 return tfp;
337 }
338
339 __private_extern__ void
set_tclass_for_curr_proc(struct socket * so)340 set_tclass_for_curr_proc(struct socket *so)
341 {
342 struct tclass_for_proc *tfp = NULL;
343 proc_t p = current_proc(); /* Not ref counted */
344 pid_t pid = proc_pid(p);
345 char *pname = proc_best_name(p);
346
347 lck_mtx_lock(&tclass_lock);
348
349 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
350 if ((tfp->tfp_pid == pid) || (tfp->tfp_pid == -1 &&
351 strncmp(pname, tfp->tfp_pname,
352 sizeof(tfp->tfp_pname)) == 0)) {
353 if (tfp->tfp_class != SO_TC_UNSPEC) {
354 so->so_traffic_class = (uint16_t)tfp->tfp_class;
355 }
356
357 if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) {
358 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
359 } else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) {
360 so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
361 }
362 break;
363 }
364 }
365
366 lck_mtx_unlock(&tclass_lock);
367 }
368
369 /*
370 * Purge entries with PIDs of exited processes
371 */
372 int
purge_tclass_for_proc(void)373 purge_tclass_for_proc(void)
374 {
375 int error = 0;
376 struct tclass_for_proc *tfp, *tvar;
377
378 lck_mtx_lock(&tclass_lock);
379
380 TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
381 proc_t p;
382
383 if (tfp->tfp_pid == -1) {
384 continue;
385 }
386 if ((p = proc_find(tfp->tfp_pid)) == NULL) {
387 tfp_count--;
388 TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
389
390 kfree_type(struct tclass_for_proc, tfp);
391 } else {
392 proc_rele(p);
393 }
394 }
395
396 lck_mtx_unlock(&tclass_lock);
397
398 return error;
399 }
400
401 /*
402 * Remove one entry
403 * Must be called with tclass_lock held
404 */
405 static void
free_tclass_for_proc(struct tclass_for_proc * tfp)406 free_tclass_for_proc(struct tclass_for_proc *tfp)
407 {
408 if (tfp == NULL) {
409 return;
410 }
411 tfp_count--;
412 TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
413 kfree_type(struct tclass_for_proc, tfp);
414 }
415
416 /*
417 * Remove all entries
418 */
419 int
flush_tclass_for_proc(void)420 flush_tclass_for_proc(void)
421 {
422 int error = 0;
423 struct tclass_for_proc *tfp, *tvar;
424
425 lck_mtx_lock(&tclass_lock);
426
427 TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
428 free_tclass_for_proc(tfp);
429 }
430
431 lck_mtx_unlock(&tclass_lock);
432
433 return error;
434 }
435
436 /*
437 * Must be called with tclass_lock held
438 */
439 static struct tclass_for_proc *
alloc_tclass_for_proc(pid_t pid,const char * pname)440 alloc_tclass_for_proc(pid_t pid, const char *pname)
441 {
442 struct tclass_for_proc *tfp;
443
444 if (pid == -1 && pname == NULL) {
445 return NULL;
446 }
447
448 tfp = kalloc_type(struct tclass_for_proc, Z_NOWAIT | Z_ZERO);
449 if (tfp == NULL) {
450 return NULL;
451 }
452
453 tfp->tfp_pid = pid;
454 /*
455 * Add per pid entries before per proc name so we can find
456 * a specific instance of a process before the general name base entry.
457 */
458 if (pid != -1) {
459 TAILQ_INSERT_HEAD(&tfp_head, tfp, tfp_link);
460 } else {
461 strlcpy(tfp->tfp_pname, pname, sizeof(tfp->tfp_pname));
462 TAILQ_INSERT_TAIL(&tfp_head, tfp, tfp_link);
463 }
464
465 tfp_count++;
466
467 return tfp;
468 }
469
470 /*
471 * SO_TC_UNSPEC for tclass means to remove the entry
472 */
473 int
set_pid_tclass(struct so_tcdbg * so_tcdbg)474 set_pid_tclass(struct so_tcdbg *so_tcdbg)
475 {
476 int error = EINVAL;
477 proc_t p = NULL;
478 struct tclass_for_proc *tfp;
479 pid_t pid = so_tcdbg->so_tcdbg_pid;
480 int tclass = so_tcdbg->so_tcdbg_tclass;
481 int netsvctype = so_tcdbg->so_tcdbg_netsvctype;
482
483 p = proc_find(pid);
484 if (p == NULL) {
485 printf("%s proc_find(%d) failed\n", __func__, pid);
486 goto done;
487 }
488
489 /* Need a tfp */
490 lck_mtx_lock(&tclass_lock);
491
492 tfp = find_tfp_by_pid(pid);
493 if (tfp == NULL) {
494 tfp = alloc_tclass_for_proc(pid, NULL);
495 if (tfp == NULL) {
496 lck_mtx_unlock(&tclass_lock);
497 error = ENOBUFS;
498 goto done;
499 }
500 }
501 tfp->tfp_class = tclass;
502 tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
503
504 lck_mtx_unlock(&tclass_lock);
505
506 if (tfp != NULL) {
507 struct fileproc *fp;
508
509 proc_fdlock(p);
510
511 fdt_foreach(fp, p) {
512 struct socket *so;
513
514 if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
515 continue;
516 }
517
518 so = (struct socket *)fp_get_data(fp);
519 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
520 continue;
521 }
522
523 socket_lock(so, 1);
524 if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) {
525 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
526 } else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) {
527 so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
528 }
529 socket_unlock(so, 1);
530
531 if (netsvctype != _NET_SERVICE_TYPE_UNSPEC) {
532 error = sock_setsockopt(so, SOL_SOCKET,
533 SO_NET_SERVICE_TYPE, &netsvctype, sizeof(int));
534 }
535 if (tclass != SO_TC_UNSPEC) {
536 error = sock_setsockopt(so, SOL_SOCKET,
537 SO_TRAFFIC_CLASS, &tclass, sizeof(int));
538 }
539 }
540
541 proc_fdunlock(p);
542 }
543
544 error = 0;
545 done:
546 if (p != NULL) {
547 proc_rele(p);
548 }
549
550 return error;
551 }
552
553 int
set_pname_tclass(struct so_tcdbg * so_tcdbg)554 set_pname_tclass(struct so_tcdbg *so_tcdbg)
555 {
556 int error = EINVAL;
557 struct tclass_for_proc *tfp;
558
559 lck_mtx_lock(&tclass_lock);
560
561 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
562 if (tfp == NULL) {
563 tfp = alloc_tclass_for_proc(-1, so_tcdbg->so_tcdbg_pname);
564 if (tfp == NULL) {
565 lck_mtx_unlock(&tclass_lock);
566 error = ENOBUFS;
567 goto done;
568 }
569 }
570 tfp->tfp_class = so_tcdbg->so_tcdbg_tclass;
571 tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
572
573 lck_mtx_unlock(&tclass_lock);
574
575 error = 0;
576 done:
577
578 return error;
579 }
580
581 static int
flush_pid_tclass(struct so_tcdbg * so_tcdbg)582 flush_pid_tclass(struct so_tcdbg *so_tcdbg)
583 {
584 pid_t pid = so_tcdbg->so_tcdbg_pid;
585 int tclass = so_tcdbg->so_tcdbg_tclass;
586 struct fileproc *fp;
587 proc_t p;
588 int error;
589
590 p = proc_find(pid);
591 if (p == PROC_NULL) {
592 printf("%s proc_find(%d) failed\n", __func__, pid);
593 return EINVAL;
594 }
595
596 proc_fdlock(p);
597
598 fdt_foreach(fp, p) {
599 struct socket *so;
600
601 if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
602 continue;
603 }
604
605 so = (struct socket *)fp_get_data(fp);
606 error = sock_setsockopt(so, SOL_SOCKET, SO_FLUSH, &tclass,
607 sizeof(tclass));
608 if (error != 0) {
609 printf("%s: setsockopt(SO_FLUSH) (so=0x%llx, fd=%d, "
610 "tclass=%d) failed %d\n", __func__,
611 (uint64_t)VM_KERNEL_ADDRPERM(so), fdt_foreach_fd(), tclass,
612 error);
613 }
614 }
615
616 proc_fdunlock(p);
617
618 proc_rele(p);
619 return 0;
620 }
621
622 int
get_pid_tclass(struct so_tcdbg * so_tcdbg)623 get_pid_tclass(struct so_tcdbg *so_tcdbg)
624 {
625 int error = EINVAL;
626 proc_t p = NULL;
627 struct tclass_for_proc *tfp;
628 pid_t pid = so_tcdbg->so_tcdbg_pid;
629
630 so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
631
632 p = proc_find(pid);
633 if (p == NULL) {
634 printf("%s proc_find(%d) failed\n", __func__, pid);
635 goto done;
636 }
637
638 /* Need a tfp */
639 lck_mtx_lock(&tclass_lock);
640
641 tfp = find_tfp_by_pid(pid);
642 if (tfp != NULL) {
643 so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
644 so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
645 error = 0;
646 }
647 lck_mtx_unlock(&tclass_lock);
648 done:
649 if (p != NULL) {
650 proc_rele(p);
651 }
652
653 return error;
654 }
655
656 int
get_pname_tclass(struct so_tcdbg * so_tcdbg)657 get_pname_tclass(struct so_tcdbg *so_tcdbg)
658 {
659 int error = EINVAL;
660 struct tclass_for_proc *tfp;
661
662 so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
663
664 /* Need a tfp */
665 lck_mtx_lock(&tclass_lock);
666
667 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
668 if (tfp != NULL) {
669 so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
670 so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
671 error = 0;
672 }
673 lck_mtx_unlock(&tclass_lock);
674
675 return error;
676 }
677
678 static int
delete_tclass_for_pid_pname(struct so_tcdbg * so_tcdbg)679 delete_tclass_for_pid_pname(struct so_tcdbg *so_tcdbg)
680 {
681 int error = EINVAL;
682 pid_t pid = so_tcdbg->so_tcdbg_pid;
683 struct tclass_for_proc *tfp = NULL;
684
685 lck_mtx_lock(&tclass_lock);
686
687 if (pid != -1) {
688 tfp = find_tfp_by_pid(pid);
689 } else {
690 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
691 }
692
693 if (tfp != NULL) {
694 free_tclass_for_proc(tfp);
695 error = 0;
696 }
697
698 lck_mtx_unlock(&tclass_lock);
699
700 return error;
701 }
702
703 /*
704 * Setting options requires privileges
705 */
706 __private_extern__ int
so_set_tcdbg(struct socket * so,struct so_tcdbg * so_tcdbg)707 so_set_tcdbg(struct socket *so, struct so_tcdbg *so_tcdbg)
708 {
709 int error = 0;
710
711 if ((so->so_state & SS_PRIV) == 0) {
712 return EPERM;
713 }
714
715 socket_unlock(so, 0);
716
717 switch (so_tcdbg->so_tcdbg_cmd) {
718 case SO_TCDBG_PID:
719 error = set_pid_tclass(so_tcdbg);
720 break;
721
722 case SO_TCDBG_PNAME:
723 error = set_pname_tclass(so_tcdbg);
724 break;
725
726 case SO_TCDBG_PURGE:
727 error = purge_tclass_for_proc();
728 break;
729
730 case SO_TCDBG_FLUSH:
731 error = flush_tclass_for_proc();
732 break;
733
734 case SO_TCDBG_DELETE:
735 error = delete_tclass_for_pid_pname(so_tcdbg);
736 break;
737
738 case SO_TCDBG_TCFLUSH_PID:
739 error = flush_pid_tclass(so_tcdbg);
740 break;
741
742 default:
743 error = EINVAL;
744 break;
745 }
746
747 socket_lock(so, 0);
748
749 return error;
750 }
751
752 /*
753 * Not required to be privileged to get
754 */
755 __private_extern__ int
sogetopt_tcdbg(struct socket * so,struct sockopt * sopt)756 sogetopt_tcdbg(struct socket *so, struct sockopt *sopt)
757 {
758 int error = 0;
759 struct so_tcdbg so_tcdbg;
760 void *buf = NULL;
761 size_t len = sopt->sopt_valsize;
762
763 error = sooptcopyin(sopt, &so_tcdbg, sizeof(struct so_tcdbg),
764 sizeof(struct so_tcdbg));
765 if (error != 0) {
766 return error;
767 }
768
769 sopt->sopt_valsize = len;
770
771 socket_unlock(so, 0);
772
773 switch (so_tcdbg.so_tcdbg_cmd) {
774 case SO_TCDBG_PID:
775 error = get_pid_tclass(&so_tcdbg);
776 break;
777
778 case SO_TCDBG_PNAME:
779 error = get_pname_tclass(&so_tcdbg);
780 break;
781
782 case SO_TCDBG_COUNT:
783 lck_mtx_lock(&tclass_lock);
784 so_tcdbg.so_tcdbg_count = tfp_count;
785 lck_mtx_unlock(&tclass_lock);
786 break;
787
788 case SO_TCDBG_LIST: {
789 struct tclass_for_proc *tfp;
790 int n, alloc_count;
791 struct so_tcdbg *ptr;
792
793 lck_mtx_lock(&tclass_lock);
794 if ((alloc_count = tfp_count) == 0) {
795 lck_mtx_unlock(&tclass_lock);
796 error = EINVAL;
797 break;
798 }
799 len = alloc_count * sizeof(struct so_tcdbg);
800 lck_mtx_unlock(&tclass_lock);
801
802 buf = kalloc_data(len, Z_WAITOK | Z_ZERO);
803 if (buf == NULL) {
804 error = ENOBUFS;
805 break;
806 }
807
808 lck_mtx_lock(&tclass_lock);
809 n = 0;
810 ptr = (struct so_tcdbg *)buf;
811 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
812 if (++n > alloc_count) {
813 break;
814 }
815 if (tfp->tfp_pid != -1) {
816 ptr->so_tcdbg_cmd = SO_TCDBG_PID;
817 ptr->so_tcdbg_pid = tfp->tfp_pid;
818 } else {
819 ptr->so_tcdbg_cmd = SO_TCDBG_PNAME;
820 ptr->so_tcdbg_pid = -1;
821 strlcpy(ptr->so_tcdbg_pname,
822 tfp->tfp_pname,
823 sizeof(ptr->so_tcdbg_pname));
824 }
825 ptr->so_tcdbg_tclass = tfp->tfp_class;
826 ptr->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
827 ptr++;
828 }
829
830 lck_mtx_unlock(&tclass_lock);
831 }
832 break;
833
834 default:
835 error = EINVAL;
836 break;
837 }
838
839 socket_lock(so, 0);
840
841 if (error == 0) {
842 if (buf == NULL) {
843 error = sooptcopyout(sopt, &so_tcdbg,
844 sizeof(struct so_tcdbg));
845 } else {
846 error = sooptcopyout(sopt, buf, len);
847 kfree_data(buf, len);
848 }
849 }
850 return error;
851 }
852
853 #endif /* (DEVELOPMENT || DEBUG) */
854
855 int
so_get_netsvc_marking_level(struct socket * so)856 so_get_netsvc_marking_level(struct socket *so)
857 {
858 int marking_level = NETSVC_MRKNG_UNKNOWN;
859 struct ifnet *ifp = NULL;
860
861 switch (SOCK_DOM(so)) {
862 case PF_INET: {
863 struct inpcb *inp = sotoinpcb(so);
864
865 if (inp != NULL) {
866 ifp = inp->inp_last_outifp;
867 }
868 break;
869 }
870 case PF_INET6: {
871 struct in6pcb *in6p = sotoin6pcb(so);
872
873 if (in6p != NULL) {
874 ifp = in6p->in6p_last_outifp;
875 }
876 break;
877 }
878 default:
879 break;
880 }
881 if (ifp != NULL) {
882 if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0) {
883 if ((so->so_flags1 & SOF1_QOSMARKING_ALLOWED)) {
884 marking_level = NETSVC_MRKNG_LVL_L3L2_ALL;
885 } else {
886 marking_level = NETSVC_MRKNG_LVL_L3L2_BK;
887 }
888 } else {
889 marking_level = NETSVC_MRKNG_LVL_L2;
890 }
891 }
892 return marking_level;
893 }
894
895 __private_extern__ int
so_set_traffic_class(struct socket * so,int optval)896 so_set_traffic_class(struct socket *so, int optval)
897 {
898 int error = 0;
899
900 if (optval < SO_TC_BE || optval > SO_TC_CTL) {
901 error = EINVAL;
902 } else {
903 switch (optval) {
904 case _SO_TC_BK:
905 optval = SO_TC_BK;
906 break;
907 case _SO_TC_VI:
908 optval = SO_TC_VI;
909 break;
910 case _SO_TC_VO:
911 optval = SO_TC_VO;
912 break;
913 default:
914 if (!SO_VALID_TC(optval)) {
915 error = EINVAL;
916 }
917 break;
918 }
919
920 if (error == 0) {
921 int oldval = so->so_traffic_class;
922
923 VERIFY(SO_VALID_TC(optval));
924 so->so_traffic_class = (uint16_t)optval;
925
926 if ((SOCK_DOM(so) == PF_INET ||
927 SOCK_DOM(so) == PF_INET6) &&
928 SOCK_TYPE(so) == SOCK_STREAM) {
929 set_tcp_stream_priority(so);
930 }
931
932 if ((SOCK_DOM(so) == PF_INET ||
933 SOCK_DOM(so) == PF_INET6) &&
934 optval != oldval && (optval == SO_TC_BK_SYS ||
935 oldval == SO_TC_BK_SYS)) {
936 /*
937 * If the app switches from BK_SYS to something
938 * else, resume the socket if it was suspended.
939 */
940 if (oldval == SO_TC_BK_SYS) {
941 inp_reset_fc_state(so->so_pcb);
942 }
943
944 SOTHROTTLELOG("throttle[%d]: so 0x%llx "
945 "[%d,%d] opportunistic %s\n", so->last_pid,
946 (uint64_t)VM_KERNEL_ADDRPERM(so),
947 SOCK_DOM(so), SOCK_TYPE(so),
948 (optval == SO_TC_BK_SYS) ? "ON" : "OFF");
949 }
950 }
951 }
952 return error;
953 }
954
955 __private_extern__ int
so_set_net_service_type(struct socket * so,int netsvctype)956 so_set_net_service_type(struct socket *so, int netsvctype)
957 {
958 int sotc;
959 int error;
960
961 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype)) {
962 return EINVAL;
963 }
964
965 sotc = sotc_by_netservicetype[netsvctype];
966 error = so_set_traffic_class(so, sotc);
967 if (error != 0) {
968 return error;
969 }
970 so->so_netsvctype = (int8_t)netsvctype;
971 so->so_flags1 |= SOF1_TC_NET_SERV_TYPE;
972
973 return 0;
974 }
975
976 __private_extern__ void
so_set_default_traffic_class(struct socket * so)977 so_set_default_traffic_class(struct socket *so)
978 {
979 so->so_traffic_class = SO_TC_BE;
980
981 if ((SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6)) {
982 if (net_qos_policy_restricted == 0) {
983 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
984 }
985 #if (DEVELOPMENT || DEBUG)
986 if (tfp_count > 0) {
987 set_tclass_for_curr_proc(so);
988 }
989 #endif /* (DEVELOPMENT || DEBUG) */
990 }
991 }
992
993 __private_extern__ int
so_set_opportunistic(struct socket * so,int optval)994 so_set_opportunistic(struct socket *so, int optval)
995 {
996 return so_set_traffic_class(so, (optval == 0) ?
997 SO_TC_BE : SO_TC_BK_SYS);
998 }
999
1000 __private_extern__ int
so_get_opportunistic(struct socket * so)1001 so_get_opportunistic(struct socket *so)
1002 {
1003 return so->so_traffic_class == SO_TC_BK_SYS;
1004 }
1005
1006 __private_extern__ int
so_tc_from_control(struct mbuf * control,int * out_netsvctype)1007 so_tc_from_control(struct mbuf *control, int *out_netsvctype)
1008 {
1009 struct cmsghdr *cm;
1010 int sotc = SO_TC_UNSPEC;
1011
1012 *out_netsvctype = _NET_SERVICE_TYPE_UNSPEC;
1013
1014 for (cm = M_FIRST_CMSGHDR(control);
1015 is_cmsg_valid(control, cm);
1016 cm = M_NXT_CMSGHDR(control, cm)) {
1017 int val;
1018
1019 if (cm->cmsg_level != SOL_SOCKET ||
1020 cm->cmsg_len != CMSG_LEN(sizeof(int))) {
1021 continue;
1022 }
1023 val = *(int *)(void *)CMSG_DATA(cm);
1024 /*
1025 * The first valid option wins
1026 */
1027 switch (cm->cmsg_type) {
1028 case SO_TRAFFIC_CLASS:
1029 if (SO_VALID_TC(val)) {
1030 sotc = val;
1031 return sotc;
1032 /* NOT REACHED */
1033 } else if (val < SO_TC_NET_SERVICE_OFFSET) {
1034 break;
1035 }
1036 /*
1037 * Handle the case SO_NET_SERVICE_TYPE values are
1038 * passed using SO_TRAFFIC_CLASS
1039 */
1040 val = val - SO_TC_NET_SERVICE_OFFSET;
1041 OS_FALLTHROUGH;
1042 case SO_NET_SERVICE_TYPE:
1043 if (!IS_VALID_NET_SERVICE_TYPE(val)) {
1044 break;
1045 }
1046 *out_netsvctype = val;
1047 sotc = sotc_by_netservicetype[val];
1048 return sotc;
1049 /* NOT REACHED */
1050 default:
1051 break;
1052 }
1053 }
1054
1055 return sotc;
1056 }
1057
1058 __private_extern__ int
so_tos_from_control(struct mbuf * control)1059 so_tos_from_control(struct mbuf *control)
1060 {
1061 struct cmsghdr *cm;
1062 int tos = IPTOS_UNSPEC;
1063
1064 for (cm = M_FIRST_CMSGHDR(control);
1065 is_cmsg_valid(control, cm);
1066 cm = M_NXT_CMSGHDR(control, cm)) {
1067 if (cm->cmsg_len != CMSG_LEN(sizeof(int))) {
1068 continue;
1069 }
1070
1071 if ((cm->cmsg_level == IPPROTO_IP &&
1072 cm->cmsg_type == IP_TOS) ||
1073 (cm->cmsg_level == IPPROTO_IPV6 &&
1074 cm->cmsg_type == IPV6_TCLASS)) {
1075 tos = *(int *)(void *)CMSG_DATA(cm) & IPTOS_MASK;
1076 /* The first valid option wins */
1077 break;
1078 }
1079 }
1080
1081 return tos;
1082 }
1083
1084 __private_extern__ void
so_recv_data_stat(struct socket * so,struct mbuf * m,size_t off)1085 so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off)
1086 {
1087 uint32_t mtc = m_get_traffic_class(m);
1088
1089 if (mtc >= SO_TC_STATS_MAX) {
1090 mtc = MBUF_TC_BE;
1091 }
1092
1093 so->so_tc_stats[mtc].rxpackets += 1;
1094 so->so_tc_stats[mtc].rxbytes +=
1095 ((m->m_flags & M_PKTHDR) ? m->m_pkthdr.len : 0) + off;
1096 }
1097
1098 __private_extern__ void
so_inc_recv_data_stat(struct socket * so,size_t pkts,size_t bytes,uint32_t mtc)1099 so_inc_recv_data_stat(struct socket *so, size_t pkts, size_t bytes,
1100 uint32_t mtc)
1101 {
1102 if (mtc >= SO_TC_STATS_MAX) {
1103 mtc = MBUF_TC_BE;
1104 }
1105
1106 so->so_tc_stats[mtc].rxpackets += pkts;
1107 so->so_tc_stats[mtc].rxbytes += bytes;
1108 }
1109
1110 static inline int
so_throttle_best_effort(struct socket * so,struct ifnet * ifp)1111 so_throttle_best_effort(struct socket *so, struct ifnet *ifp)
1112 {
1113 uint32_t uptime = (uint32_t)net_uptime();
1114 return soissrcbesteffort(so) &&
1115 net_io_policy_throttle_best_effort == 1 &&
1116 ifp->if_rt_sendts > 0 &&
1117 (int)(uptime - ifp->if_rt_sendts) <= TCP_BG_SWITCH_TIME;
1118 }
1119
1120 __private_extern__ void
set_tcp_stream_priority(struct socket * so)1121 set_tcp_stream_priority(struct socket *so)
1122 {
1123 struct inpcb *inp = sotoinpcb(so);
1124 struct tcpcb *tp = intotcpcb(inp);
1125 struct ifnet *outifp;
1126 u_char old_cc = tp->tcp_cc_index;
1127 int recvbg = IS_TCP_RECV_BG(so);
1128 bool is_local = false, fg_active = false;
1129 uint32_t uptime;
1130
1131 VERIFY((SOCK_CHECK_DOM(so, PF_INET) ||
1132 SOCK_CHECK_DOM(so, PF_INET6)) &&
1133 SOCK_CHECK_TYPE(so, SOCK_STREAM) &&
1134 SOCK_CHECK_PROTO(so, IPPROTO_TCP));
1135
1136 /* Return if the socket is in a terminal state */
1137 if (inp->inp_state == INPCB_STATE_DEAD) {
1138 return;
1139 }
1140
1141 outifp = inp->inp_last_outifp;
1142 uptime = (uint32_t)net_uptime();
1143
1144 /*
1145 * If the socket was marked as a background socket or if the
1146 * traffic class is set to background with traffic class socket
1147 * option then make both send and recv side of the stream to be
1148 * background. The variable sotcdb which can be set with sysctl
1149 * is used to disable these settings for testing.
1150 */
1151 if (outifp == NULL || (outifp->if_flags & IFF_LOOPBACK)) {
1152 is_local = true;
1153 }
1154
1155 /* Check if there has been recent foreground activity */
1156 if (outifp != NULL) {
1157 /*
1158 * If the traffic source is background, check if
1159 * there is recent foreground activity which should
1160 * continue to keep the traffic source as background.
1161 * Otherwise, we can switch the traffic source to
1162 * foreground.
1163 */
1164 if (soissrcbackground(so) && outifp->if_fg_sendts > 0 &&
1165 (int)(uptime - outifp->if_fg_sendts) <= TCP_BG_SWITCH_TIME) {
1166 fg_active = true;
1167 }
1168
1169 /*
1170 * The traffic source is best-effort -- check if
1171 * the policy to throttle best effort is enabled
1172 * and there was realtime activity on this
1173 * interface recently. If this is true, enable
1174 * algorithms that respond to increased latency
1175 * on best-effort traffic.
1176 */
1177 if (so_throttle_best_effort(so, outifp)) {
1178 fg_active = true;
1179 }
1180 }
1181
1182 /*
1183 * System initiated background traffic like cloud uploads should
1184 * always use background delay sensitive algorithms. This will
1185 * make the stream more responsive to other streams on the user's
1186 * network and it will minimize latency induced.
1187 */
1188 if (fg_active || IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
1189 /*
1190 * If the interface that the connection is using is
1191 * loopback, do not use background congestion
1192 * control algorithm.
1193 *
1194 * If there has been recent foreground activity or if there
1195 * was an indication that a real time foreground application
1196 * is going to use networking (net_io_policy_throttled),
1197 * switch the background and best effort streams to use background
1198 * congestion control algorithm.
1199 */
1200 if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0 || is_local) {
1201 if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) {
1202 tcp_set_foreground_cc(so);
1203 }
1204 } else {
1205 if (old_cc != TCP_CC_ALGO_BACKGROUND_INDEX) {
1206 tcp_set_background_cc(so);
1207 }
1208 }
1209
1210 /* Set receive side background flags */
1211 if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0 || is_local) {
1212 tcp_clear_recv_bg(so);
1213 } else {
1214 tcp_set_recv_bg(so);
1215 }
1216 } else {
1217 /*
1218 * If there is no recent foreground activity, even the
1219 * background flows can use foreground congestion controller.
1220 */
1221 tcp_clear_recv_bg(so);
1222 if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) {
1223 tcp_set_foreground_cc(so);
1224 }
1225 }
1226
1227 if (old_cc != tp->tcp_cc_index || recvbg != IS_TCP_RECV_BG(so)) {
1228 SOTHROTTLELOG("throttle[%d]: so 0x%llx [%d,%d] TCP %s send; "
1229 "%s recv\n", so->last_pid,
1230 (uint64_t)VM_KERNEL_ADDRPERM(so),
1231 SOCK_DOM(so), SOCK_TYPE(so),
1232 (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) ?
1233 "background" : "foreground",
1234 IS_TCP_RECV_BG(so) ? "background" : "foreground");
1235 }
1236 }
1237
1238 /*
1239 * Set traffic class to an IPv4 or IPv6 packet
1240 * - mark the mbuf
1241 * - set the DSCP code following the WMM mapping
1242 */
1243 __private_extern__ void
set_packet_service_class(struct mbuf * m,struct socket * so,int sotc,uint32_t flags)1244 set_packet_service_class(struct mbuf *m, struct socket *so,
1245 int sotc, uint32_t flags)
1246 {
1247 mbuf_svc_class_t msc = MBUF_SC_BE; /* Best effort by default */
1248 struct inpcb *inp = sotoinpcb(so); /* in6pcb and inpcb are the same */
1249
1250 if (!(m->m_flags & M_PKTHDR)) {
1251 return;
1252 }
1253
1254 /*
1255 * Here is the precedence:
1256 * 1) TRAFFIC_MGT_SO_BACKGROUND trumps all
1257 * 2) Traffic class passed via ancillary data to sendmsdg(2)
1258 * 3) Traffic class socket option last
1259 */
1260 if (sotc != SO_TC_UNSPEC) {
1261 VERIFY(SO_VALID_TC(sotc));
1262 msc = so_tc2msc(sotc);
1263 /* Assert because tc must have been valid */
1264 VERIFY(MBUF_VALID_SC(msc));
1265 }
1266
1267 /*
1268 * If TRAFFIC_MGT_SO_BACKGROUND is set or policy to throttle
1269 * best effort is set, depress the priority.
1270 */
1271 if (!IS_MBUF_SC_BACKGROUND(msc) && soisthrottled(so)) {
1272 msc = MBUF_SC_BK;
1273 }
1274
1275 if (IS_MBUF_SC_BESTEFFORT(msc) && inp->inp_last_outifp != NULL &&
1276 so_throttle_best_effort(so, inp->inp_last_outifp)) {
1277 msc = MBUF_SC_BK;
1278 }
1279
1280 if (soissrcbackground(so)) {
1281 m->m_pkthdr.pkt_flags |= PKTF_SO_BACKGROUND;
1282 }
1283
1284 if (soissrcrealtime(so) || IS_MBUF_SC_REALTIME(msc)) {
1285 m->m_pkthdr.pkt_flags |= PKTF_SO_REALTIME;
1286 }
1287 /*
1288 * Set the traffic class in the mbuf packet header svc field
1289 */
1290 if (sotcdb & SOTCDB_NO_MTC) {
1291 goto no_mbtc;
1292 }
1293
1294 /*
1295 * Elevate service class if the packet is a pure TCP ACK.
1296 * We can do this only when the flow is not a background
1297 * flow and the outgoing interface supports
1298 * transmit-start model.
1299 */
1300 if (!IS_MBUF_SC_BACKGROUND(msc) &&
1301 (flags & (PKT_SCF_TCP_ACK | PKT_SCF_TCP_SYN)) != 0) {
1302 msc = MBUF_SC_CTL;
1303 }
1304
1305 (void) m_set_service_class(m, msc);
1306
1307 /*
1308 * Set the privileged traffic auxiliary flag if applicable,
1309 * or clear it.
1310 */
1311 if (!(sotcdb & SOTCDB_NO_PRIVILEGED) && soisprivilegedtraffic(so) &&
1312 msc != MBUF_SC_UNSPEC) {
1313 m->m_pkthdr.pkt_flags |= PKTF_PRIO_PRIVILEGED;
1314 } else {
1315 m->m_pkthdr.pkt_flags &= ~PKTF_PRIO_PRIVILEGED;
1316 }
1317
1318 no_mbtc:
1319 /*
1320 * For TCP with background traffic class switch CC algo based on sysctl
1321 */
1322 if (so->so_type == SOCK_STREAM) {
1323 set_tcp_stream_priority(so);
1324 }
1325
1326 so_tc_update_stats(m, so, msc);
1327 }
1328
1329 __private_extern__ void
so_tc_update_stats(struct mbuf * m,struct socket * so,mbuf_svc_class_t msc)1330 so_tc_update_stats(struct mbuf *m, struct socket *so, mbuf_svc_class_t msc)
1331 {
1332 mbuf_traffic_class_t mtc;
1333
1334 /*
1335 * Assume socket and mbuf traffic class values are the same
1336 * Also assume the socket lock is held. Note that the stats
1337 * at the socket layer are reduced down to the legacy traffic
1338 * classes; we could/should potentially expand so_tc_stats[].
1339 */
1340 mtc = MBUF_SC2TC(msc);
1341 VERIFY(mtc < SO_TC_STATS_MAX);
1342 so->so_tc_stats[mtc].txpackets += 1;
1343 so->so_tc_stats[mtc].txbytes += m->m_pkthdr.len;
1344 }
1345
1346 __private_extern__ mbuf_svc_class_t
so_tc2msc(int tc)1347 so_tc2msc(int tc)
1348 {
1349 mbuf_svc_class_t msc;
1350
1351 switch (tc) {
1352 case SO_TC_BK_SYS:
1353 msc = MBUF_SC_BK_SYS;
1354 break;
1355 case SO_TC_BK:
1356 case _SO_TC_BK:
1357 msc = MBUF_SC_BK;
1358 break;
1359 case SO_TC_BE:
1360 msc = MBUF_SC_BE;
1361 break;
1362 case SO_TC_RD:
1363 msc = MBUF_SC_RD;
1364 break;
1365 case SO_TC_OAM:
1366 msc = MBUF_SC_OAM;
1367 break;
1368 case SO_TC_AV:
1369 msc = MBUF_SC_AV;
1370 break;
1371 case SO_TC_RV:
1372 msc = MBUF_SC_RV;
1373 break;
1374 case SO_TC_VI:
1375 case _SO_TC_VI:
1376 msc = MBUF_SC_VI;
1377 break;
1378 case SO_TC_NETSVC_SIG:
1379 msc = MBUF_SC_SIG;
1380 break;
1381 case SO_TC_VO:
1382 case _SO_TC_VO:
1383 msc = MBUF_SC_VO;
1384 break;
1385 case SO_TC_CTL:
1386 msc = MBUF_SC_CTL;
1387 break;
1388 case SO_TC_ALL:
1389 default:
1390 msc = MBUF_SC_UNSPEC;
1391 break;
1392 }
1393
1394 return msc;
1395 }
1396
1397 __private_extern__ int
so_svc2tc(mbuf_svc_class_t svc)1398 so_svc2tc(mbuf_svc_class_t svc)
1399 {
1400 switch (svc) {
1401 case MBUF_SC_BK_SYS:
1402 return SO_TC_BK_SYS;
1403 case MBUF_SC_BK:
1404 return SO_TC_BK;
1405 case MBUF_SC_BE:
1406 return SO_TC_BE;
1407 case MBUF_SC_RD:
1408 return SO_TC_RD;
1409 case MBUF_SC_OAM:
1410 return SO_TC_OAM;
1411 case MBUF_SC_AV:
1412 return SO_TC_AV;
1413 case MBUF_SC_RV:
1414 return SO_TC_RV;
1415 case MBUF_SC_VI:
1416 return SO_TC_VI;
1417 case MBUF_SC_SIG:
1418 return SO_TC_NETSVC_SIG;
1419 case MBUF_SC_VO:
1420 return SO_TC_VO;
1421 case MBUF_SC_CTL:
1422 return SO_TC_CTL;
1423 case MBUF_SC_UNSPEC:
1424 default:
1425 return SO_TC_BE;
1426 }
1427 }
1428
1429 static size_t
sotc_index(int sotc)1430 sotc_index(int sotc)
1431 {
1432 switch (sotc) {
1433 case SO_TC_BK_SYS:
1434 return SOTCIX_BK_SYS;
1435 case _SO_TC_BK:
1436 case SO_TC_BK:
1437 return SOTCIX_BK;
1438
1439 case SO_TC_BE:
1440 return SOTCIX_BE;
1441 case SO_TC_RD:
1442 return SOTCIX_RD;
1443 case SO_TC_OAM:
1444 return SOTCIX_OAM;
1445
1446 case SO_TC_AV:
1447 return SOTCIX_AV;
1448 case SO_TC_RV:
1449 return SOTCIX_RV;
1450 case _SO_TC_VI:
1451 case SO_TC_VI:
1452 return SOTCIX_VI;
1453
1454 case _SO_TC_VO:
1455 case SO_TC_VO:
1456 return SOTCIX_VO;
1457 case SO_TC_CTL:
1458 return SOTCIX_CTL;
1459
1460 default:
1461 break;
1462 }
1463 /*
1464 * Unknown traffic class value
1465 */
1466 return SIZE_T_MAX;
1467 }
1468
1469 uint8_t
fastlane_sc_to_dscp(uint32_t svc_class)1470 fastlane_sc_to_dscp(uint32_t svc_class)
1471 {
1472 uint8_t dscp = _DSCP_DF;
1473
1474 switch (svc_class) {
1475 case MBUF_SC_BK_SYS:
1476 case MBUF_SC_BK:
1477 dscp = _DSCP_AF11;
1478 break;
1479
1480 case MBUF_SC_BE:
1481 dscp = _DSCP_DF;
1482 break;
1483 case MBUF_SC_RD:
1484 dscp = _DSCP_AF21;
1485 break;
1486 case MBUF_SC_OAM:
1487 dscp = _DSCP_CS2;
1488 break;
1489
1490 case MBUF_SC_AV:
1491 dscp = _DSCP_AF31;
1492 break;
1493 case MBUF_SC_RV:
1494 dscp = _DSCP_CS4;
1495 break;
1496 case MBUF_SC_VI:
1497 dscp = _DSCP_AF41;
1498 break;
1499 case MBUF_SC_SIG:
1500 dscp = _DSCP_CS3;
1501 break;
1502
1503 case MBUF_SC_VO:
1504 dscp = _DSCP_EF;
1505 break;
1506 case MBUF_SC_CTL:
1507 dscp = _DSCP_DF;
1508 break;
1509 default:
1510 dscp = _DSCP_DF;
1511 break;
1512 }
1513
1514 return dscp;
1515 }
1516
1517 uint8_t
rfc4594_sc_to_dscp(uint32_t svc_class)1518 rfc4594_sc_to_dscp(uint32_t svc_class)
1519 {
1520 uint8_t dscp = _DSCP_DF;
1521
1522 switch (svc_class) {
1523 case MBUF_SC_BK_SYS: /* Low-Priority Data */
1524 case MBUF_SC_BK:
1525 dscp = _DSCP_CS1;
1526 break;
1527
1528 case MBUF_SC_BE: /* Standard */
1529 dscp = _DSCP_DF;
1530 break;
1531 case MBUF_SC_RD: /* Low-Latency Data */
1532 dscp = _DSCP_AF21;
1533 break;
1534
1535 /* SVC_CLASS Not Defined: High-Throughput Data */
1536
1537 case MBUF_SC_OAM: /* OAM */
1538 dscp = _DSCP_CS2;
1539 break;
1540
1541 /* SVC_CLASS Not Defined: Broadcast Video */
1542
1543 case MBUF_SC_AV: /* Multimedia Streaming */
1544 dscp = _DSCP_AF31;
1545 break;
1546 case MBUF_SC_RV: /* Real-Time Interactive */
1547 dscp = _DSCP_CS4;
1548 break;
1549 case MBUF_SC_VI: /* Multimedia Conferencing */
1550 dscp = _DSCP_AF41;
1551 break;
1552 case MBUF_SC_SIG: /* Signaling */
1553 dscp = _DSCP_CS5;
1554 break;
1555
1556 case MBUF_SC_VO: /* Telephony */
1557 dscp = _DSCP_EF;
1558 break;
1559 case MBUF_SC_CTL: /* Network Control*/
1560 dscp = _DSCP_CS6;
1561 break;
1562 default:
1563 dscp = _DSCP_DF;
1564 break;
1565 }
1566
1567 return dscp;
1568 }
1569
1570 mbuf_traffic_class_t
rfc4594_dscp_to_tc(uint8_t dscp)1571 rfc4594_dscp_to_tc(uint8_t dscp)
1572 {
1573 mbuf_traffic_class_t tc = MBUF_TC_BE;
1574
1575 switch (dscp) {
1576 case _DSCP_CS1:
1577 tc = MBUF_TC_BK;
1578 break;
1579 case _DSCP_DF:
1580 case _DSCP_AF21:
1581 case _DSCP_CS2:
1582 tc = MBUF_TC_BE;
1583 break;
1584 case _DSCP_AF31:
1585 case _DSCP_CS4:
1586 case _DSCP_AF41:
1587 case _DSCP_CS5:
1588 tc = MBUF_TC_VI;
1589 break;
1590 case _DSCP_EF:
1591 case _DSCP_CS6:
1592 tc = MBUF_TC_VO;
1593 break;
1594 default:
1595 tc = MBUF_TC_BE;
1596 break;
1597 }
1598
1599 return tc;
1600 }
1601
1602 /*
1603 * Pass NULL ifp for default map
1604 */
1605 static errno_t
set_netsvctype_dscp_map(struct net_qos_dscp_map * net_qos_dscp_map,const struct netsvctype_dscp_map * netsvctype_dscp_map)1606 set_netsvctype_dscp_map(struct net_qos_dscp_map *net_qos_dscp_map,
1607 const struct netsvctype_dscp_map *netsvctype_dscp_map)
1608 {
1609 size_t i;
1610 int netsvctype;
1611
1612 /*
1613 * Do not accept more that max number of distinct DSCPs
1614 */
1615 if (net_qos_dscp_map == NULL || netsvctype_dscp_map == NULL) {
1616 return EINVAL;
1617 }
1618
1619 /*
1620 * Validate input parameters
1621 */
1622 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1623 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype_dscp_map[i].netsvctype)) {
1624 return EINVAL;
1625 }
1626 if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
1627 return EINVAL;
1628 }
1629 }
1630
1631 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1632 netsvctype = netsvctype_dscp_map[i].netsvctype;
1633
1634 net_qos_dscp_map->netsvctype_to_dscp[netsvctype] =
1635 netsvctype_dscp_map[i].dscp;
1636 }
1637 for (netsvctype = 0; netsvctype < _NET_SERVICE_TYPE_COUNT; netsvctype++) {
1638 switch (netsvctype) {
1639 case NET_SERVICE_TYPE_BE:
1640 case NET_SERVICE_TYPE_BK:
1641 case NET_SERVICE_TYPE_VI:
1642 case NET_SERVICE_TYPE_VO:
1643 case NET_SERVICE_TYPE_RV:
1644 case NET_SERVICE_TYPE_AV:
1645 case NET_SERVICE_TYPE_OAM:
1646 case NET_SERVICE_TYPE_RD: {
1647 size_t sotcix;
1648
1649 sotcix = sotc_index(sotc_by_netservicetype[netsvctype]);
1650 if (sotcix != SIZE_T_MAX) {
1651 net_qos_dscp_map->sotc_to_dscp[sotcix] =
1652 netsvctype_dscp_map[netsvctype].dscp;
1653 }
1654 break;
1655 }
1656 case NET_SERVICE_TYPE_SIG:
1657 /* Signaling does not have its own traffic class */
1658 break;
1659 default:
1660 /* We should not be here */
1661 ASSERT(0);
1662 }
1663 }
1664 if (net_qos_dscp_map == &fastlane_net_qos_dscp_map) {
1665 /* Network control socket traffic class is always best effort for fastlane*/
1666 net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_DF;
1667 } else {
1668 net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_CS6;
1669 }
1670
1671 /* Background system socket traffic class DSCP same as background */
1672 net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK_SYS] =
1673 net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK];
1674
1675 return 0;
1676 }
1677
1678 static size_t
get_netsvctype_dscp_map(struct netsvctype_dscp_map * netsvctype_dscp_map)1679 get_netsvctype_dscp_map(struct netsvctype_dscp_map *netsvctype_dscp_map)
1680 {
1681 struct net_qos_dscp_map *net_qos_dscp_map;
1682 int i;
1683
1684 net_qos_dscp_map = &fastlane_net_qos_dscp_map;
1685
1686 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1687 netsvctype_dscp_map[i].netsvctype = i;
1688 netsvctype_dscp_map[i].dscp = net_qos_dscp_map->netsvctype_to_dscp[i];
1689 }
1690
1691 return i * sizeof(struct netsvctype_dscp_map);
1692 }
1693
1694 void
net_qos_map_init()1695 net_qos_map_init()
1696 {
1697 errno_t error;
1698
1699 error = set_netsvctype_dscp_map(&fastlane_net_qos_dscp_map,
1700 fastlane_netsvctype_dscp_map);
1701 ASSERT(error == 0);
1702
1703 error = set_netsvctype_dscp_map(&rfc4594_net_qos_dscp_map,
1704 rfc4594_netsvctype_dscp_map);
1705 ASSERT(error == 0);
1706
1707 #if (DEBUG || DEVELOPMENT)
1708 error = set_netsvctype_dscp_map(&custom_net_qos_dscp_map,
1709 rfc4594_netsvctype_dscp_map);
1710 ASSERT(error == 0);
1711
1712 #endif /* (DEBUG || DEVELOPMENT) */
1713
1714 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
1715 }
1716
1717 int
1718 sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS
1719 {
1720 #pragma unused(oidp, arg1, arg2)
1721 int error = 0;
1722
1723 if (req->oldptr == USER_ADDR_NULL) {
1724 req->oldidx =
1725 _NET_SERVICE_TYPE_COUNT * sizeof(struct netsvctype_dscp_map);
1726 } else if (req->oldlen > 0) {
1727 struct netsvctype_dscp_map netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {};
1728 size_t len;
1729
1730 len = get_netsvctype_dscp_map(netsvctype_dscp_map);
1731
1732 error = SYSCTL_OUT(req, netsvctype_dscp_map,
1733 MIN(len, req->oldlen));
1734 if (error != 0) {
1735 goto done;
1736 }
1737 }
1738
1739 if (req->newptr != USER_ADDR_NULL) {
1740 error = EPERM;
1741 }
1742 done:
1743 return error;
1744 }
1745
1746 __private_extern__ errno_t
set_packet_qos(struct mbuf * m,struct ifnet * ifp,boolean_t qos_allowed,int sotc,int netsvctype,uint8_t * dscp_inout)1747 set_packet_qos(struct mbuf *m, struct ifnet *ifp, boolean_t qos_allowed,
1748 int sotc, int netsvctype, uint8_t *dscp_inout)
1749 {
1750 if (ifp == NULL || dscp_inout == NULL) {
1751 return EINVAL;
1752 }
1753
1754 if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0 &&
1755 ifp->if_qosmarking_mode != IFRTYPE_QOSMARKING_MODE_NONE) {
1756 uint8_t dscp;
1757 const struct net_qos_dscp_map *net_qos_dscp_map = NULL;
1758
1759 switch (ifp->if_qosmarking_mode) {
1760 case IFRTYPE_QOSMARKING_FASTLANE:
1761 net_qos_dscp_map = &fastlane_net_qos_dscp_map;
1762 break;
1763 case IFRTYPE_QOSMARKING_RFC4594:
1764 net_qos_dscp_map = &rfc4594_net_qos_dscp_map;
1765 break;
1766 #if (DEBUG || DEVELOPMENT)
1767 case IFRTYPE_QOSMARKING_CUSTOM:
1768 net_qos_dscp_map = &custom_net_qos_dscp_map;
1769 break;
1770 #endif /* (DEBUG || DEVELOPMENT) */
1771 default:
1772 panic("invalid QoS marking type");
1773 /* NOTREACHED */
1774 }
1775
1776 /*
1777 * When on a Fastlane network, IP_TOS/IPV6_TCLASS are no-ops
1778 */
1779 dscp = _DSCP_DF;
1780
1781 /*
1782 * For DSCP use the network service type is specified, otherwise
1783 * use the socket traffic class
1784 *
1785 * When not whitelisted by the policy, set DSCP only for best
1786 * effort and background, and set the mbuf service class to
1787 * best effort as well so the packet will be queued and
1788 * scheduled at a lower priority.
1789 * We still want to prioritize control traffic on the interface
1790 * so we do not change the mbuf service class for SO_TC_CTL
1791 */
1792 if (IS_VALID_NET_SERVICE_TYPE(netsvctype) &&
1793 netsvctype != NET_SERVICE_TYPE_BE) {
1794 dscp = net_qos_dscp_map->netsvctype_to_dscp[netsvctype];
1795
1796 if (qos_allowed == FALSE &&
1797 netsvctype != NET_SERVICE_TYPE_BE &&
1798 netsvctype != NET_SERVICE_TYPE_BK) {
1799 dscp = _DSCP_DF;
1800 if (sotc != SO_TC_CTL) {
1801 m_set_service_class(m, MBUF_SC_BE);
1802 }
1803 }
1804 } else if (sotc != SO_TC_UNSPEC) {
1805 size_t sotcix = sotc_index(sotc);
1806 if (sotcix != SIZE_T_MAX) {
1807 dscp = net_qos_dscp_map->sotc_to_dscp[sotcix];
1808
1809 if (qos_allowed == FALSE && sotc != SO_TC_BE &&
1810 sotc != SO_TC_BK && sotc != SO_TC_BK_SYS &&
1811 sotc != SO_TC_CTL) {
1812 dscp = _DSCP_DF;
1813 if (sotc != SO_TC_CTL) {
1814 m_set_service_class(m, MBUF_SC_BE);
1815 }
1816 }
1817 }
1818 }
1819 if (net_qos_verbose != 0) {
1820 printf("%s qos_allowed %d sotc %u netsvctype %u dscp %u\n",
1821 __func__, qos_allowed, sotc, netsvctype, dscp);
1822 }
1823
1824 if (*dscp_inout != dscp) {
1825 *dscp_inout = dscp;
1826 }
1827 } else if (*dscp_inout != _DSCP_DF && IFNET_IS_WIFI_INFRA(ifp)) {
1828 mbuf_svc_class_t msc = m_get_service_class(m);
1829
1830 /*
1831 * For WiFi infra, when the mbuf service class is best effort
1832 * and the DSCP is not default, set the service class based
1833 * on DSCP
1834 */
1835 if (msc == MBUF_SC_BE) {
1836 msc = wifi_dscp_to_msc_array[*dscp_inout];
1837
1838 if (msc != MBUF_SC_BE) {
1839 m_set_service_class(m, msc);
1840
1841 if (net_qos_verbose != 0) {
1842 printf("%s set msc %u for dscp %u\n",
1843 __func__, msc, *dscp_inout);
1844 }
1845 }
1846 }
1847 }
1848
1849 return 0;
1850 }
1851
1852 static void
set_dscp_to_wifi_ac_map(const struct dcsp_msc_map * map,int clear)1853 set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *map, int clear)
1854 {
1855 int i;
1856
1857 if (clear) {
1858 bzero(wifi_dscp_to_msc_array, sizeof(wifi_dscp_to_msc_array));
1859 }
1860
1861 for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
1862 const struct dcsp_msc_map *elem = map + i;
1863
1864 if (elem->dscp > _MAX_DSCP || elem->msc == MBUF_SC_UNSPEC) {
1865 break;
1866 }
1867 switch (elem->msc) {
1868 case MBUF_SC_BK_SYS:
1869 case MBUF_SC_BK:
1870 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BK;
1871 break;
1872 default:
1873 case MBUF_SC_BE:
1874 case MBUF_SC_RD:
1875 case MBUF_SC_OAM:
1876 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BE;
1877 break;
1878 case MBUF_SC_AV:
1879 case MBUF_SC_RV:
1880 case MBUF_SC_VI:
1881 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VI;
1882 break;
1883 case MBUF_SC_VO:
1884 case MBUF_SC_CTL:
1885 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VO;
1886 break;
1887 }
1888 }
1889 }
1890
1891 static errno_t
dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map * netsvctype_dscp_map,size_t count,struct dcsp_msc_map * dcsp_msc_map)1892 dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *netsvctype_dscp_map,
1893 size_t count, struct dcsp_msc_map *dcsp_msc_map)
1894 {
1895 errno_t error = 0;
1896 uint32_t i;
1897
1898 /*
1899 * Validate input parameters
1900 */
1901 for (i = 0; i < count; i++) {
1902 if (!SO_VALID_TC(netsvctype_dscp_map[i].netsvctype)) {
1903 error = EINVAL;
1904 goto done;
1905 }
1906 if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
1907 error = EINVAL;
1908 goto done;
1909 }
1910 }
1911
1912 bzero(dcsp_msc_map, DSCP_ARRAY_SIZE * sizeof(struct dcsp_msc_map));
1913
1914 for (i = 0; i < count; i++) {
1915 dcsp_msc_map[i].dscp = netsvctype_dscp_map[i].dscp;
1916 dcsp_msc_map[i].msc = so_tc2msc(netsvctype_dscp_map[i].netsvctype);
1917 }
1918 done:
1919 return error;
1920 }
1921
1922 int
1923 sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
1924 {
1925 #pragma unused(oidp, arg1, arg2)
1926 int error = 0;
1927 size_t len = DSCP_ARRAY_SIZE * sizeof(struct netsvctype_dscp_map);
1928 struct netsvctype_dscp_map netsvctype_dscp_map[DSCP_ARRAY_SIZE] = {};
1929 struct dcsp_msc_map dcsp_msc_map[DSCP_ARRAY_SIZE];
1930 size_t count;
1931
1932 if (req->oldptr == USER_ADDR_NULL) {
1933 req->oldidx = len;
1934 } else if (req->oldlen > 0) {
1935 uint8_t i;
1936
1937 for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
1938 netsvctype_dscp_map[i].dscp = i;
1939 netsvctype_dscp_map[i].netsvctype =
1940 so_svc2tc(wifi_dscp_to_msc_array[i]);
1941 }
1942 error = SYSCTL_OUT(req, netsvctype_dscp_map,
1943 MIN(len, req->oldlen));
1944 if (error != 0) {
1945 goto done;
1946 }
1947 }
1948
1949 if (req->newptr == USER_ADDR_NULL) {
1950 goto done;
1951 }
1952
1953 error = proc_suser(current_proc());
1954 if (error != 0) {
1955 goto done;
1956 }
1957
1958 /*
1959 * Check input length
1960 */
1961 if (req->newlen > len) {
1962 error = EINVAL;
1963 goto done;
1964 }
1965 /*
1966 * Cap the number of entries to copy from input buffer
1967 */
1968 if (len > req->newlen) {
1969 len = req->newlen;
1970 }
1971 error = SYSCTL_IN(req, netsvctype_dscp_map, len);
1972 if (error != 0) {
1973 goto done;
1974 }
1975 count = len / sizeof(struct netsvctype_dscp_map);
1976 bzero(dcsp_msc_map, sizeof(dcsp_msc_map));
1977 error = dscp_msc_map_from_netsvctype_dscp_map(netsvctype_dscp_map, count,
1978 dcsp_msc_map);
1979 if (error != 0) {
1980 goto done;
1981 }
1982 set_dscp_to_wifi_ac_map(dcsp_msc_map, 0);
1983 done:
1984 return error;
1985 }
1986
1987 int
1988 sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
1989 {
1990 #pragma unused(oidp, arg1, arg2)
1991 int error = 0;
1992 int val = 0;
1993
1994 error = sysctl_handle_int(oidp, &val, 0, req);
1995 if (error || !req->newptr) {
1996 return error;
1997 }
1998 if (req->newptr == USER_ADDR_NULL) {
1999 return 0;
2000 }
2001 error = proc_suser(current_proc());
2002 if (error != 0) {
2003 return error;
2004 }
2005
2006 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
2007
2008 return 0;
2009 }
2010
2011 /*
2012 * Returns whether a large upload or download transfer should be marked as
2013 * BK service type for network activity. This is a system level
2014 * hint/suggestion to classify application traffic based on statistics
2015 * collected from the current network attachment
2016 *
2017 * Returns 1 for BK and 0 for default
2018 */
2019
2020 int
net_qos_guideline(struct proc * p,struct net_qos_guideline_args * arg,int * retval)2021 net_qos_guideline(struct proc *p, struct net_qos_guideline_args *arg,
2022 int *retval)
2023 {
2024 #pragma unused(p)
2025 #define RETURN_USE_BK 1
2026 #define RETURN_USE_DEFAULT 0
2027 struct net_qos_param qos_arg;
2028 struct ifnet *ipv4_primary, *ipv6_primary;
2029 int err = 0;
2030
2031 if (arg->param == USER_ADDR_NULL || retval == NULL ||
2032 arg->param_len != sizeof(qos_arg)) {
2033 return EINVAL;
2034 }
2035 err = copyin(arg->param, (caddr_t) &qos_arg, sizeof(qos_arg));
2036 if (err != 0) {
2037 return err;
2038 }
2039
2040 *retval = RETURN_USE_DEFAULT;
2041 ipv4_primary = ifindex2ifnet[get_primary_ifscope(AF_INET)];
2042 ipv6_primary = ifindex2ifnet[get_primary_ifscope(AF_INET6)];
2043
2044 /*
2045 * If either of the interfaces is in Low Internet mode, enable
2046 * background delay based algorithms on this transfer
2047 */
2048 if (qos_arg.nq_uplink) {
2049 if ((ipv4_primary != NULL &&
2050 (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_UL)) ||
2051 (ipv6_primary != NULL &&
2052 (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_UL))) {
2053 *retval = RETURN_USE_BK;
2054 return 0;
2055 }
2056 } else {
2057 if ((ipv4_primary != NULL &&
2058 (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_DL)) ||
2059 (ipv6_primary != NULL &&
2060 (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_DL))) {
2061 *retval = RETURN_USE_BK;
2062 return 0;
2063 }
2064 }
2065
2066 /*
2067 * Some times IPv4 and IPv6 primary interfaces can be different.
2068 * In this case, if either of them is non-cellular, we should mark
2069 * the transfer as BK as it can potentially get used based on
2070 * the host name resolution
2071 */
2072 if (ipv4_primary != NULL && IFNET_IS_EXPENSIVE(ipv4_primary) &&
2073 ipv6_primary != NULL && IFNET_IS_EXPENSIVE(ipv6_primary)) {
2074 if (qos_arg.nq_use_expensive) {
2075 return 0;
2076 } else {
2077 *retval = RETURN_USE_BK;
2078 return 0;
2079 }
2080 }
2081 if (ipv4_primary != NULL && IFNET_IS_CONSTRAINED(ipv4_primary) &&
2082 ipv6_primary != NULL && IFNET_IS_CONSTRAINED(ipv6_primary)) {
2083 if (qos_arg.nq_use_constrained) {
2084 return 0;
2085 } else {
2086 *retval = RETURN_USE_BK;
2087 return 0;
2088 }
2089 }
2090 if (qos_arg.nq_transfer_size >= 5 * 1024 * 1024) {
2091 *retval = RETURN_USE_BK;
2092 return 0;
2093 }
2094
2095
2096 #undef RETURN_USE_BK
2097 #undef RETURN_USE_DEFAULT
2098 return 0;
2099 }
2100
2101 #if (DEBUG || DEVELOPMENT)
2102 /*
2103 * Customizable QoS mapping table
2104 * By default it uses the mapping table for RFC 4594
2105 *
2106 * Notes:
2107 * BK_SYS is the same as BK
2108 * CTL cannot be changed and is always _DSCP_CS6
2109 */
2110 SYSCTL_NODE(_net_qos, OID_AUTO, custom,
2111 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
2112
2113 SYSCTL_NODE(_net_qos_custom, OID_AUTO, netsvctype_to_dscp,
2114 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
2115
2116 static int sysctl_net_qos_custom_netsvctype_to_dscp SYSCTL_HANDLER_ARGS;
2117 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, be,
2118 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2119 0, NET_SERVICE_TYPE_BE, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2120 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, bk,
2121 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2122 0, NET_SERVICE_TYPE_BK, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2123 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, sig,
2124 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2125 0, NET_SERVICE_TYPE_SIG, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2126 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, vi,
2127 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2128 0, NET_SERVICE_TYPE_VI, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2129 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, vo,
2130 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2131 0, NET_SERVICE_TYPE_VO, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2132 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, rv,
2133 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2134 0, NET_SERVICE_TYPE_RV, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2135 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, av,
2136 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2137 0, NET_SERVICE_TYPE_AV, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2138 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, oam,
2139 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2140 0, NET_SERVICE_TYPE_OAM, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2141 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, rd,
2142 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2143 0, NET_SERVICE_TYPE_RD, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2144
2145 static int sysctl_net_qos_custom_reset SYSCTL_HANDLER_ARGS;
2146 SYSCTL_PROC(_net_qos_custom, OID_AUTO, reset,
2147 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2148 0, 0, sysctl_net_qos_custom_reset, "I", "");
2149
2150 int
2151 sysctl_net_qos_custom_netsvctype_to_dscp SYSCTL_HANDLER_ARGS
2152 {
2153 #pragma unused(arg1)
2154 int error = 0;
2155
2156 switch (arg2) {
2157 case NET_SERVICE_TYPE_BE:
2158 case NET_SERVICE_TYPE_BK:
2159 case NET_SERVICE_TYPE_SIG:
2160 case NET_SERVICE_TYPE_VI:
2161 case NET_SERVICE_TYPE_VO:
2162 case NET_SERVICE_TYPE_RV:
2163 case NET_SERVICE_TYPE_AV:
2164 case NET_SERVICE_TYPE_OAM:
2165 case NET_SERVICE_TYPE_RD:
2166 break;
2167 default:
2168 os_log(OS_LOG_DEFAULT, "%s: unexpected netsvctype %d",
2169 __func__, arg2);
2170 return EINVAL;
2171 }
2172
2173 int val = custom_net_qos_dscp_map.netsvctype_to_dscp[arg2];
2174 error = sysctl_handle_int(oidp, &val, 0, req);
2175 if (error != 0 || req->newptr == USER_ADDR_NULL) {
2176 return error;
2177 }
2178 if (req->newptr == USER_ADDR_NULL) {
2179 return 0;
2180 }
2181 error = proc_suser(current_proc());
2182 if (error != 0) {
2183 return error;
2184 }
2185 if (val < 0 || val > _MAX_DSCP) {
2186 os_log(OS_LOG_DEFAULT, "%s: unexpected DSCP %d",
2187 __func__, val);
2188 return EINVAL;
2189 }
2190
2191 struct netsvctype_dscp_map netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {};
2192
2193 for (int i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
2194 netsvctype_dscp_map[i].netsvctype = i;
2195 netsvctype_dscp_map[i].dscp = custom_net_qos_dscp_map.netsvctype_to_dscp[i];
2196 }
2197 netsvctype_dscp_map[arg2].dscp = (uint8_t) val;
2198
2199 error = set_netsvctype_dscp_map(&custom_net_qos_dscp_map,
2200 netsvctype_dscp_map);
2201
2202 return 0;
2203 }
2204
2205 int
2206 sysctl_net_qos_custom_reset SYSCTL_HANDLER_ARGS
2207 {
2208 #pragma unused(arg1, arg2)
2209 int error = 0;
2210 int val = 0;
2211
2212 error = sysctl_handle_int(oidp, &val, 0, req);
2213 if (error || !req->newptr) {
2214 return error;
2215 }
2216 if (req->newptr == USER_ADDR_NULL) {
2217 return 0;
2218 }
2219 error = proc_suser(current_proc());
2220 if (error != 0) {
2221 return error;
2222 }
2223
2224 error = set_netsvctype_dscp_map(&custom_net_qos_dscp_map,
2225 rfc4594_netsvctype_dscp_map);
2226
2227 return error;
2228 }
2229
2230 uint8_t
custom_sc_to_dscp(uint32_t svc_class)2231 custom_sc_to_dscp(uint32_t svc_class)
2232 {
2233 uint8_t dscp = _DSCP_DF;
2234
2235 switch (svc_class) {
2236 case MBUF_SC_BK_SYS:
2237 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_BK_SYS];
2238 break;
2239 case MBUF_SC_BK:
2240 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_BK];
2241 break;
2242
2243 case MBUF_SC_BE:
2244 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_BE];
2245 break;
2246 case MBUF_SC_RD:
2247 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_RD];
2248 break;
2249 case MBUF_SC_OAM:
2250 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_OAM];
2251 break;
2252
2253 case MBUF_SC_AV:
2254 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_AV];
2255 break;
2256 case MBUF_SC_RV:
2257 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_RV];
2258 break;
2259 case MBUF_SC_VI:
2260 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_VI];
2261 break;
2262 case MBUF_SC_SIG:
2263 dscp = custom_net_qos_dscp_map.netsvctype_to_dscp[NET_SERVICE_TYPE_SIG];
2264 break;
2265
2266 case MBUF_SC_VO:
2267 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_VO];
2268 break;
2269 case MBUF_SC_CTL:
2270 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_CTL];
2271 break;
2272 default:
2273 break;
2274 }
2275 return dscp;
2276 }
2277 #endif /* (DEBUG || DEVELOPMENT) */
2278