1 /*
2 * Copyright (c) 2009-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/types.h>
32 #include <sys/filedesc.h>
33 #include <sys/file_internal.h>
34 #include <sys/proc.h>
35 #include <sys/socket.h>
36 #include <sys/socketvar.h>
37 #include <sys/errno.h>
38 #include <sys/protosw.h>
39 #include <sys/domain.h>
40 #include <sys/mbuf.h>
41 #include <sys/queue.h>
42 #include <sys/sysctl.h>
43 #include <sys/sysproto.h>
44
45 #include <net/if.h>
46 #include <net/if_var.h>
47 #include <net/route.h>
48
49 #include <netinet/in.h>
50 #include <netinet/in_var.h>
51 #include <netinet/in_pcb.h>
52 #include <netinet/ip.h>
53 #include <netinet/ip_var.h>
54 #include <netinet/ip6.h>
55 #include <netinet6/ip6_var.h>
56 #include <netinet/udp.h>
57 #include <netinet/udp_var.h>
58 #include <netinet/tcp.h>
59 #include <netinet/tcp_var.h>
60 #include <netinet/tcp_cc.h>
61 #include <netinet/in_tclass.h>
62
63 #include <os/log.h>
64
65 static_assert(_SO_TC_MAX == SO_TC_STATS_MAX);
66
67 /*
68 * The size is one more than the max because DSCP start at zero
69 */
70 #define DSCP_ARRAY_SIZE (_MAX_DSCP + 1)
71
72 struct net_qos_dscp_map {
73 uint8_t sotc_to_dscp[SO_TC_MAX];
74 uint8_t netsvctype_to_dscp[_NET_SERVICE_TYPE_COUNT];
75 };
76
77 struct dcsp_msc_map {
78 uint8_t dscp;
79 mbuf_svc_class_t msc;
80 };
81 static inline int so_throttle_best_effort(struct socket *, struct ifnet *);
82 static void set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *__indexable, int);
83 static errno_t dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *__counted_by(count) map,
84 size_t count, struct dcsp_msc_map *__counted_by(DSCP_ARRAY_SIZE));
85
86 SYSCTL_NODE(_net, OID_AUTO, qos,
87 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "QoS");
88
89 static int sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS;
90 SYSCTL_PROC(_net_qos, OID_AUTO, default_netsvctype_to_dscp_map,
91 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
92 0, 0, sysctl_default_netsvctype_to_dscp_map, "S", "");
93
94 static int sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
95 SYSCTL_PROC(_net_qos, OID_AUTO, dscp_to_wifi_ac_map,
96 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
97 0, 0, sysctl_dscp_to_wifi_ac_map, "S", "");
98
99 static int sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
100 SYSCTL_PROC(_net_qos, OID_AUTO, reset_dscp_to_wifi_ac_map,
101 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
102 0, 0, sysctl_reset_dscp_to_wifi_ac_map, "I", "");
103
104 int net_qos_verbose = 0;
105 SYSCTL_INT(_net_qos, OID_AUTO, verbose,
106 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_verbose, 0, "");
107
108 /*
109 * Fastlane QoS policy:
110 * By Default allow all apps to get traffic class to DSCP mapping
111 */
112 SYSCTL_NODE(_net_qos, OID_AUTO, policy,
113 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
114
115 int net_qos_policy_restricted = 0;
116 SYSCTL_INT(_net_qos_policy, OID_AUTO, restricted,
117 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restricted, 0, "");
118
119 int net_qos_policy_restrict_avapps = 0;
120 SYSCTL_INT(_net_qos_policy, OID_AUTO, restrict_avapps,
121 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restrict_avapps, 0, "");
122
123 int net_qos_policy_wifi_enabled = 0;
124 SYSCTL_INT(_net_qos_policy, OID_AUTO, wifi_enabled,
125 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_wifi_enabled, 0, "");
126
127 int net_qos_policy_capable_enabled = 0;
128 SYSCTL_INT(_net_qos_policy, OID_AUTO, capable_enabled,
129 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_capable_enabled, 0, "");
130
131 /*
132 * Socket traffic class from network service type
133 */
134 const int sotc_by_netservicetype[_NET_SERVICE_TYPE_COUNT] = {
135 SO_TC_BE, /* NET_SERVICE_TYPE_BE */
136 SO_TC_BK, /* NET_SERVICE_TYPE_BK */
137 SO_TC_VI, /* NET_SERVICE_TYPE_SIG */
138 SO_TC_VI, /* NET_SERVICE_TYPE_VI */
139 SO_TC_VO, /* NET_SERVICE_TYPE_VO */
140 SO_TC_RV, /* NET_SERVICE_TYPE_RV */
141 SO_TC_AV, /* NET_SERVICE_TYPE_AV */
142 SO_TC_OAM, /* NET_SERVICE_TYPE_OAM */
143 SO_TC_RD /* NET_SERVICE_TYPE_RD */
144 };
145
146 /*
147 * DSCP mappings for QoS Fastlane as based on network service types
148 */
149 static const
150 struct netsvctype_dscp_map fastlane_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
151 { .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF },
152 { .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_AF11 },
153 { .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS3 },
154 { .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 },
155 { .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF },
156 { .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 },
157 { .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 },
158 { .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 },
159 { .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 },
160 };
161
162 /*
163 * DSCP mappings for QoS RFC4594 as based on network service types
164 */
165 static const
166 struct netsvctype_dscp_map rfc4594_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
167 { .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF },
168 { .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_CS1 },
169 { .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS5 },
170 { .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 },
171 { .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF },
172 { .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 },
173 { .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 },
174 { .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 },
175 { .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 },
176 };
177
178 static struct net_qos_dscp_map fastlane_net_qos_dscp_map;
179 static struct net_qos_dscp_map rfc4594_net_qos_dscp_map;
180 #if (DEBUG || DEVELOPMENT)
181 static struct net_qos_dscp_map custom_net_qos_dscp_map;
182 #endif /* (DEBUG || DEVELOPMENT) */
183
184
185 /*
186 * The DSCP to UP mapping (via mbuf service class) for WiFi follows is the mapping
187 * that implemented at the 802.11 driver level when the mbuf service class is
188 * MBUF_SC_BE.
189 *
190 * This clashes with the recommended mapping documented by the IETF document
191 * draft-szigeti-tsvwg-ieee-802-11e-01.txt but we keep the mapping to maintain
192 * binary compatibility. Applications should use the network service type socket
193 * option instead to select L2 QoS marking instead of IP_TOS or IPV6_TCLASS.
194 */
195 static const struct dcsp_msc_map default_dscp_to_wifi_ac_map[] = {
196 { .dscp = _DSCP_DF, .msc = MBUF_SC_BE }, /* RFC 2474 Standard */
197 { .dscp = 1, .msc = MBUF_SC_BE }, /* */
198 { .dscp = 2, .msc = MBUF_SC_BE }, /* */
199 { .dscp = 3, .msc = MBUF_SC_BE }, /* */
200 { .dscp = 4, .msc = MBUF_SC_BE }, /* */
201 { .dscp = 5, .msc = MBUF_SC_BE }, /* */
202 { .dscp = 6, .msc = MBUF_SC_BE }, /* */
203 { .dscp = 7, .msc = MBUF_SC_BE }, /* */
204
205 { .dscp = _DSCP_CS1, .msc = MBUF_SC_BK }, /* RFC 3662 Low-Priority Data */
206 { .dscp = 9, .msc = MBUF_SC_BK }, /* */
207 { .dscp = _DSCP_AF11, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
208 { .dscp = 11, .msc = MBUF_SC_BK }, /* */
209 { .dscp = _DSCP_AF12, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
210 { .dscp = 13, .msc = MBUF_SC_BK }, /* */
211 { .dscp = _DSCP_AF13, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
212 { .dscp = 15, .msc = MBUF_SC_BK }, /* */
213
214 { .dscp = _DSCP_CS2, .msc = MBUF_SC_BK }, /* RFC 4594 OAM */
215 { .dscp = 17, .msc = MBUF_SC_BK }, /* */
216 { .dscp = _DSCP_AF21, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
217 { .dscp = 19, .msc = MBUF_SC_BK }, /* */
218 { .dscp = _DSCP_AF22, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
219 { .dscp = 21, .msc = MBUF_SC_BK }, /* */
220 { .dscp = _DSCP_AF23, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
221 { .dscp = 23, .msc = MBUF_SC_BK }, /* */
222
223 { .dscp = _DSCP_CS3, .msc = MBUF_SC_BE }, /* RFC 2474 Broadcast Video */
224 { .dscp = 25, .msc = MBUF_SC_BE }, /* */
225 { .dscp = _DSCP_AF31, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
226 { .dscp = 27, .msc = MBUF_SC_BE }, /* */
227 { .dscp = _DSCP_AF32, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
228 { .dscp = 29, .msc = MBUF_SC_BE }, /* */
229 { .dscp = _DSCP_AF33, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
230 { .dscp = 31, .msc = MBUF_SC_BE }, /* */
231
232 { .dscp = _DSCP_CS4, .msc = MBUF_SC_VI }, /* RFC 2474 Real-Time Interactive */
233 { .dscp = 33, .msc = MBUF_SC_VI }, /* */
234 { .dscp = _DSCP_AF41, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
235 { .dscp = 35, .msc = MBUF_SC_VI }, /* */
236 { .dscp = _DSCP_AF42, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
237 { .dscp = 37, .msc = MBUF_SC_VI }, /* */
238 { .dscp = _DSCP_AF43, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
239 { .dscp = 39, .msc = MBUF_SC_VI }, /* */
240
241 { .dscp = _DSCP_CS5, .msc = MBUF_SC_VI }, /* RFC 2474 Signaling */
242 { .dscp = 41, .msc = MBUF_SC_VI }, /* */
243 { .dscp = 42, .msc = MBUF_SC_VI }, /* */
244 { .dscp = 43, .msc = MBUF_SC_VI }, /* */
245 { .dscp = _DSCP_VA, .msc = MBUF_SC_VI }, /* RFC 5865 VOICE-ADMIT */
246 { .dscp = 45, .msc = MBUF_SC_VI }, /* */
247 { .dscp = _DSCP_EF, .msc = MBUF_SC_VI }, /* RFC 3246 Telephony */
248 { .dscp = 47, .msc = MBUF_SC_VI }, /* */
249
250 { .dscp = _DSCP_CS6, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Chariot */
251 { .dscp = 49, .msc = MBUF_SC_VO }, /* */
252 { .dscp = 50, .msc = MBUF_SC_VO }, /* */
253 { .dscp = 51, .msc = MBUF_SC_VO }, /* */
254 { .dscp = 52, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Sigma */
255 { .dscp = 53, .msc = MBUF_SC_VO }, /* */
256 { .dscp = 54, .msc = MBUF_SC_VO }, /* */
257 { .dscp = 55, .msc = MBUF_SC_VO }, /* */
258
259 { .dscp = _DSCP_CS7, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Chariot */
260 { .dscp = 57, .msc = MBUF_SC_VO }, /* */
261 { .dscp = 58, .msc = MBUF_SC_VO }, /* */
262 { .dscp = 59, .msc = MBUF_SC_VO }, /* */
263 { .dscp = 60, .msc = MBUF_SC_VO }, /* */
264 { .dscp = 61, .msc = MBUF_SC_VO }, /* */
265 { .dscp = 62, .msc = MBUF_SC_VO }, /* */
266 { .dscp = 63, .msc = MBUF_SC_VO }, /* */
267
268 { .dscp = 255, .msc = MBUF_SC_UNSPEC } /* invalid DSCP to mark last entry */
269 };
270
271 mbuf_svc_class_t wifi_dscp_to_msc_array[DSCP_ARRAY_SIZE];
272
273 /*
274 * If there is no foreground activity on the interface for bg_switch_time
275 * seconds, the background connections can switch to foreground TCP
276 * congestion control.
277 */
278 #define TCP_BG_SWITCH_TIME 2 /* seconds */
279
280 #if (DEVELOPMENT || DEBUG)
281
282 static LCK_GRP_DECLARE(tclass_lck_grp, "tclass");
283 static LCK_MTX_DECLARE(tclass_lock, &tclass_lck_grp);
284
285 static int tfp_count = 0;
286
287 static TAILQ_HEAD(, tclass_for_proc) tfp_head =
288 TAILQ_HEAD_INITIALIZER(tfp_head);
289
290 struct tclass_for_proc {
291 TAILQ_ENTRY(tclass_for_proc) tfp_link;
292 int tfp_class;
293 pid_t tfp_pid;
294 char tfp_pname[(2 * MAXCOMLEN) + 1];
295 uint32_t tfp_qos_mode;
296 };
297
298 static int get_pid_tclass(struct so_tcdbg *);
299 static int get_pname_tclass(struct so_tcdbg *);
300 static int set_pid_tclass(struct so_tcdbg *);
301 static int set_pname_tclass(struct so_tcdbg *);
302 static int flush_pid_tclass(struct so_tcdbg *);
303 static int purge_tclass_for_proc(void);
304 static int flush_tclass_for_proc(void);
305 static void set_tclass_for_curr_proc(struct socket *);
306
307 /*
308 * Must be called with tclass_lock held
309 */
310 static struct tclass_for_proc *
find_tfp_by_pid(pid_t pid)311 find_tfp_by_pid(pid_t pid)
312 {
313 struct tclass_for_proc *tfp;
314
315 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
316 if (tfp->tfp_pid == pid) {
317 break;
318 }
319 }
320 return tfp;
321 }
322
323 /*
324 * Must be called with tclass_lock held
325 */
326 static struct tclass_for_proc *
find_tfp_by_pname(const char * pname)327 find_tfp_by_pname(const char *pname)
328 {
329 struct tclass_for_proc *tfp;
330
331 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
332 if (strlcmp(tfp->tfp_pname, pname,
333 sizeof(tfp->tfp_pname)) == 0) {
334 break;
335 }
336 }
337 return tfp;
338 }
339
340 __private_extern__ void
set_tclass_for_curr_proc(struct socket * so)341 set_tclass_for_curr_proc(struct socket *so)
342 {
343 struct tclass_for_proc *tfp = NULL;
344 proc_t p = current_proc(); /* Not ref counted */
345 pid_t pid = proc_pid(p);
346 const char *__null_terminated pname = proc_best_name(p);
347
348 lck_mtx_lock(&tclass_lock);
349
350 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
351 if ((tfp->tfp_pid == pid) || (tfp->tfp_pid == -1 &&
352 strlcmp(tfp->tfp_pname, pname,
353 sizeof(tfp->tfp_pname)) == 0)) {
354 if (tfp->tfp_class != SO_TC_UNSPEC) {
355 so->so_traffic_class = (uint16_t)tfp->tfp_class;
356 }
357
358 if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) {
359 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
360 } else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) {
361 so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
362 }
363 break;
364 }
365 }
366
367 lck_mtx_unlock(&tclass_lock);
368 }
369
370 /*
371 * Purge entries with PIDs of exited processes
372 */
373 int
purge_tclass_for_proc(void)374 purge_tclass_for_proc(void)
375 {
376 int error = 0;
377 struct tclass_for_proc *tfp, *tvar;
378
379 lck_mtx_lock(&tclass_lock);
380
381 TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
382 proc_t p;
383
384 if (tfp->tfp_pid == -1) {
385 continue;
386 }
387 if ((p = proc_find(tfp->tfp_pid)) == NULL) {
388 tfp_count--;
389 TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
390
391 kfree_type(struct tclass_for_proc, tfp);
392 } else {
393 proc_rele(p);
394 }
395 }
396
397 lck_mtx_unlock(&tclass_lock);
398
399 return error;
400 }
401
402 /*
403 * Remove one entry
404 * Must be called with tclass_lock held
405 */
406 static void
free_tclass_for_proc(struct tclass_for_proc * tfp)407 free_tclass_for_proc(struct tclass_for_proc *tfp)
408 {
409 if (tfp == NULL) {
410 return;
411 }
412 tfp_count--;
413 TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
414 kfree_type(struct tclass_for_proc, tfp);
415 }
416
417 /*
418 * Remove all entries
419 */
420 int
flush_tclass_for_proc(void)421 flush_tclass_for_proc(void)
422 {
423 int error = 0;
424 struct tclass_for_proc *tfp, *tvar;
425
426 lck_mtx_lock(&tclass_lock);
427
428 TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
429 free_tclass_for_proc(tfp);
430 }
431
432 lck_mtx_unlock(&tclass_lock);
433
434 return error;
435 }
436
437 /*
438 * Must be called with tclass_lock held
439 */
440 static struct tclass_for_proc *
alloc_tclass_for_proc(pid_t pid,const char * __sized_by (pnamelen)pname,size_t pnamelen)441 alloc_tclass_for_proc(pid_t pid, const char *__sized_by(pnamelen) pname, size_t pnamelen)
442 {
443 struct tclass_for_proc *tfp;
444
445 if (pid == -1 && pname == NULL) {
446 return NULL;
447 }
448
449 tfp = kalloc_type(struct tclass_for_proc, Z_NOWAIT | Z_ZERO);
450 if (tfp == NULL) {
451 return NULL;
452 }
453
454 tfp->tfp_pid = pid;
455 /*
456 * Add per pid entries before per proc name so we can find
457 * a specific instance of a process before the general name base entry.
458 */
459 if (pid != -1) {
460 TAILQ_INSERT_HEAD(&tfp_head, tfp, tfp_link);
461 } else {
462 if (pname != NULL) {
463 strbufcpy(tfp->tfp_pname, sizeof(tfp->tfp_pname),
464 pname, pnamelen);
465 } else {
466 tfp->tfp_pname[0] = '\0';
467 }
468 TAILQ_INSERT_TAIL(&tfp_head, tfp, tfp_link);
469 }
470
471 tfp_count++;
472
473 return tfp;
474 }
475
476 /*
477 * SO_TC_UNSPEC for tclass means to remove the entry
478 */
479 int
set_pid_tclass(struct so_tcdbg * so_tcdbg)480 set_pid_tclass(struct so_tcdbg *so_tcdbg)
481 {
482 int error = EINVAL;
483 proc_t p = NULL;
484 struct tclass_for_proc *tfp;
485 pid_t pid = so_tcdbg->so_tcdbg_pid;
486 int tclass = so_tcdbg->so_tcdbg_tclass;
487 int netsvctype = so_tcdbg->so_tcdbg_netsvctype;
488 uint8_t ecn_val = so_tcdbg->so_tcdbg_ecn_val;
489
490 p = proc_find(pid);
491 if (p == NULL) {
492 printf("%s proc_find(%d) failed\n", __func__, pid);
493 goto done;
494 }
495
496 /* Need a tfp */
497 lck_mtx_lock(&tclass_lock);
498
499 tfp = find_tfp_by_pid(pid);
500 if (tfp == NULL) {
501 tfp = alloc_tclass_for_proc(pid, NULL, 0);
502 if (tfp == NULL) {
503 error = ENOBUFS;
504 goto done_unlock;
505 }
506 }
507 tfp->tfp_class = tclass;
508 tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
509
510 if (tfp != NULL) {
511 struct fileproc *fp;
512 proc_fdlock(p);
513 fdt_foreach(fp, p) {
514 struct socket *so;
515
516 if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
517 continue;
518 }
519
520 so = (struct socket *)fp_get_data(fp);
521 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
522 continue;
523 }
524
525 socket_lock(so, 1);
526 if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) {
527 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
528 } else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) {
529 so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
530 }
531
532 struct inpcb *inp = so ? sotoinpcb(so) : NULL;
533 struct tcpcb *tp = inp ? intotcpcb(inp) : NULL;
534
535 if (tp != NULL) {
536 if (ecn_val == IPTOS_ECN_ECT1 || ecn_val == IPTOS_ECN_ECT0) {
537 tp->ecn_flags |= (ecn_val == IPTOS_ECN_ECT1) ?
538 TE_FORCE_ECT1 : TE_FORCE_ECT0;
539 } else {
540 tp->ecn_flags &= ~(TE_FORCE_ECT1 | TE_FORCE_ECT0);
541 }
542 }
543 socket_unlock(so, 1);
544
545 if (netsvctype != _NET_SERVICE_TYPE_UNSPEC) {
546 error = sock_setsockopt(so, SOL_SOCKET,
547 SO_NET_SERVICE_TYPE, &netsvctype, sizeof(int));
548 }
549 if (tclass != SO_TC_UNSPEC) {
550 error = sock_setsockopt(so, SOL_SOCKET,
551 SO_TRAFFIC_CLASS, &tclass, sizeof(int));
552 }
553 }
554
555 proc_fdunlock(p);
556 }
557
558 error = 0;
559 done_unlock:
560 lck_mtx_unlock(&tclass_lock);
561 done:
562 if (p != NULL) {
563 proc_rele(p);
564 }
565
566 return error;
567 }
568
569 int
set_pname_tclass(struct so_tcdbg * so_tcdbg)570 set_pname_tclass(struct so_tcdbg *so_tcdbg)
571 {
572 int error = EINVAL;
573 struct tclass_for_proc *tfp;
574
575 lck_mtx_lock(&tclass_lock);
576
577 tfp = find_tfp_by_pname(__unsafe_null_terminated_from_indexable(so_tcdbg->so_tcdbg_pname));
578 if (tfp == NULL) {
579 tfp = alloc_tclass_for_proc(-1, so_tcdbg->so_tcdbg_pname,
580 sizeof(so_tcdbg->so_tcdbg_pname));
581 if (tfp == NULL) {
582 lck_mtx_unlock(&tclass_lock);
583 error = ENOBUFS;
584 goto done;
585 }
586 }
587 tfp->tfp_class = so_tcdbg->so_tcdbg_tclass;
588 tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
589
590 lck_mtx_unlock(&tclass_lock);
591
592 error = 0;
593 done:
594
595 return error;
596 }
597
598 static int
flush_pid_tclass(struct so_tcdbg * so_tcdbg)599 flush_pid_tclass(struct so_tcdbg *so_tcdbg)
600 {
601 pid_t pid = so_tcdbg->so_tcdbg_pid;
602 int tclass = so_tcdbg->so_tcdbg_tclass;
603 struct fileproc *fp;
604 proc_t p;
605 int error;
606
607 p = proc_find(pid);
608 if (p == PROC_NULL) {
609 printf("%s proc_find(%d) failed\n", __func__, pid);
610 return EINVAL;
611 }
612
613 proc_fdlock(p);
614
615 fdt_foreach(fp, p) {
616 struct socket *so;
617
618 if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
619 continue;
620 }
621
622 so = (struct socket *)fp_get_data(fp);
623 error = sock_setsockopt(so, SOL_SOCKET, SO_FLUSH, &tclass,
624 sizeof(tclass));
625 if (error != 0) {
626 printf("%s: setsockopt(SO_FLUSH) (so=0x%llx, fd=%d, "
627 "tclass=%d) failed %d\n", __func__,
628 (uint64_t)VM_KERNEL_ADDRPERM(so), fdt_foreach_fd(), tclass,
629 error);
630 }
631 }
632
633 proc_fdunlock(p);
634
635 proc_rele(p);
636 return 0;
637 }
638
639 int
get_pid_tclass(struct so_tcdbg * so_tcdbg)640 get_pid_tclass(struct so_tcdbg *so_tcdbg)
641 {
642 int error = EINVAL;
643 proc_t p = NULL;
644 struct tclass_for_proc *tfp;
645 pid_t pid = so_tcdbg->so_tcdbg_pid;
646
647 so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
648
649 p = proc_find(pid);
650 if (p == NULL) {
651 printf("%s proc_find(%d) failed\n", __func__, pid);
652 goto done;
653 }
654
655 /* Need a tfp */
656 lck_mtx_lock(&tclass_lock);
657
658 tfp = find_tfp_by_pid(pid);
659 if (tfp != NULL) {
660 so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
661 so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
662 error = 0;
663 }
664 lck_mtx_unlock(&tclass_lock);
665 done:
666 if (p != NULL) {
667 proc_rele(p);
668 }
669
670 return error;
671 }
672
673 int
get_pname_tclass(struct so_tcdbg * so_tcdbg)674 get_pname_tclass(struct so_tcdbg *so_tcdbg)
675 {
676 int error = EINVAL;
677 struct tclass_for_proc *tfp;
678
679 so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
680
681 /* Need a tfp */
682 lck_mtx_lock(&tclass_lock);
683
684 tfp = find_tfp_by_pname(__unsafe_null_terminated_from_indexable(so_tcdbg->so_tcdbg_pname));
685 if (tfp != NULL) {
686 so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
687 so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
688 error = 0;
689 }
690 lck_mtx_unlock(&tclass_lock);
691
692 return error;
693 }
694
695 static int
delete_tclass_for_pid_pname(struct so_tcdbg * so_tcdbg)696 delete_tclass_for_pid_pname(struct so_tcdbg *so_tcdbg)
697 {
698 int error = EINVAL;
699 pid_t pid = so_tcdbg->so_tcdbg_pid;
700 struct tclass_for_proc *tfp = NULL;
701
702 lck_mtx_lock(&tclass_lock);
703
704 if (pid != -1) {
705 tfp = find_tfp_by_pid(pid);
706 } else {
707 tfp = find_tfp_by_pname(__unsafe_null_terminated_from_indexable(so_tcdbg->so_tcdbg_pname));
708 }
709
710 if (tfp != NULL) {
711 free_tclass_for_proc(tfp);
712 error = 0;
713 }
714
715 lck_mtx_unlock(&tclass_lock);
716
717 return error;
718 }
719
720 /*
721 * Setting options requires privileges
722 */
723 __private_extern__ int
so_set_tcdbg(struct socket * so,struct so_tcdbg * so_tcdbg)724 so_set_tcdbg(struct socket *so, struct so_tcdbg *so_tcdbg)
725 {
726 int error = 0;
727
728 if ((so->so_state & SS_PRIV) == 0) {
729 return EPERM;
730 }
731
732 socket_unlock(so, 0);
733
734 switch (so_tcdbg->so_tcdbg_cmd) {
735 case SO_TCDBG_PID:
736 error = set_pid_tclass(so_tcdbg);
737 break;
738
739 case SO_TCDBG_PNAME:
740 error = set_pname_tclass(so_tcdbg);
741 break;
742
743 case SO_TCDBG_PURGE:
744 error = purge_tclass_for_proc();
745 break;
746
747 case SO_TCDBG_FLUSH:
748 error = flush_tclass_for_proc();
749 break;
750
751 case SO_TCDBG_DELETE:
752 error = delete_tclass_for_pid_pname(so_tcdbg);
753 break;
754
755 case SO_TCDBG_TCFLUSH_PID:
756 error = flush_pid_tclass(so_tcdbg);
757 break;
758
759 default:
760 error = EINVAL;
761 break;
762 }
763
764 socket_lock(so, 0);
765
766 return error;
767 }
768
769 /*
770 * Not required to be privileged to get
771 */
772 __private_extern__ int
sogetopt_tcdbg(struct socket * so,struct sockopt * sopt)773 sogetopt_tcdbg(struct socket *so, struct sockopt *sopt)
774 {
775 int error = 0;
776 struct so_tcdbg so_tcdbg;
777 void *buf = NULL;
778 size_t len = sopt->sopt_valsize;
779
780 error = sooptcopyin(sopt, &so_tcdbg, sizeof(struct so_tcdbg),
781 sizeof(struct so_tcdbg));
782 if (error != 0) {
783 return error;
784 }
785
786 sopt->sopt_valsize = len;
787
788 socket_unlock(so, 0);
789
790 switch (so_tcdbg.so_tcdbg_cmd) {
791 case SO_TCDBG_PID:
792 error = get_pid_tclass(&so_tcdbg);
793 break;
794
795 case SO_TCDBG_PNAME:
796 error = get_pname_tclass(&so_tcdbg);
797 break;
798
799 case SO_TCDBG_COUNT:
800 lck_mtx_lock(&tclass_lock);
801 so_tcdbg.so_tcdbg_count = tfp_count;
802 lck_mtx_unlock(&tclass_lock);
803 break;
804
805 case SO_TCDBG_LIST: {
806 struct tclass_for_proc *tfp;
807 int n, alloc_count;
808 struct so_tcdbg *ptr;
809
810 lck_mtx_lock(&tclass_lock);
811 if ((alloc_count = tfp_count) == 0) {
812 lck_mtx_unlock(&tclass_lock);
813 error = EINVAL;
814 break;
815 }
816 len = alloc_count * sizeof(struct so_tcdbg);
817 lck_mtx_unlock(&tclass_lock);
818
819 buf = kalloc_data(len, Z_WAITOK | Z_ZERO);
820 if (buf == NULL) {
821 error = ENOBUFS;
822 break;
823 }
824
825 lck_mtx_lock(&tclass_lock);
826 n = 0;
827 ptr = (struct so_tcdbg *)buf;
828 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
829 if (++n > alloc_count) {
830 break;
831 }
832 if (tfp->tfp_pid != -1) {
833 ptr->so_tcdbg_cmd = SO_TCDBG_PID;
834 ptr->so_tcdbg_pid = tfp->tfp_pid;
835 } else {
836 ptr->so_tcdbg_cmd = SO_TCDBG_PNAME;
837 ptr->so_tcdbg_pid = -1;
838 strbufcpy(ptr->so_tcdbg_pname,
839 tfp->tfp_pname);
840 }
841 ptr->so_tcdbg_tclass = tfp->tfp_class;
842 ptr->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
843 ptr++;
844 }
845
846 lck_mtx_unlock(&tclass_lock);
847 }
848 break;
849
850 default:
851 error = EINVAL;
852 break;
853 }
854
855 socket_lock(so, 0);
856
857 if (error == 0) {
858 if (buf == NULL) {
859 error = sooptcopyout(sopt, &so_tcdbg,
860 sizeof(struct so_tcdbg));
861 } else {
862 error = sooptcopyout(sopt, buf, len);
863 kfree_data(buf, len);
864 }
865 }
866 return error;
867 }
868
869 #endif /* (DEVELOPMENT || DEBUG) */
870
871 int
so_get_netsvc_marking_level(struct socket * so)872 so_get_netsvc_marking_level(struct socket *so)
873 {
874 int marking_level = NETSVC_MRKNG_UNKNOWN;
875 struct ifnet *ifp = NULL;
876
877 switch (SOCK_DOM(so)) {
878 case PF_INET: {
879 struct inpcb *inp = sotoinpcb(so);
880
881 if (inp != NULL) {
882 ifp = inp->inp_last_outifp;
883 }
884 break;
885 }
886 case PF_INET6: {
887 struct in6pcb *in6p = sotoin6pcb(so);
888
889 if (in6p != NULL) {
890 ifp = in6p->in6p_last_outifp;
891 }
892 break;
893 }
894 default:
895 break;
896 }
897 if (ifp != NULL) {
898 if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0) {
899 if ((so->so_flags1 & SOF1_QOSMARKING_ALLOWED)) {
900 marking_level = NETSVC_MRKNG_LVL_L3L2_ALL;
901 } else {
902 marking_level = NETSVC_MRKNG_LVL_L3L2_BK;
903 }
904 } else {
905 marking_level = NETSVC_MRKNG_LVL_L2;
906 }
907 }
908 return marking_level;
909 }
910
911 __private_extern__ int
so_set_traffic_class(struct socket * so,int optval)912 so_set_traffic_class(struct socket *so, int optval)
913 {
914 int error = 0;
915
916 if (optval < SO_TC_BE || optval > SO_TC_CTL) {
917 error = EINVAL;
918 } else {
919 switch (optval) {
920 case _SO_TC_BK:
921 optval = SO_TC_BK;
922 break;
923 case _SO_TC_VI:
924 optval = SO_TC_VI;
925 break;
926 case _SO_TC_VO:
927 optval = SO_TC_VO;
928 break;
929 default:
930 if (!SO_VALID_TC(optval)) {
931 error = EINVAL;
932 }
933 break;
934 }
935
936 if (error == 0) {
937 int oldval = so->so_traffic_class;
938
939 VERIFY(SO_VALID_TC(optval));
940 so->so_traffic_class = (uint16_t)optval;
941
942 if ((SOCK_DOM(so) == PF_INET ||
943 SOCK_DOM(so) == PF_INET6) &&
944 SOCK_TYPE(so) == SOCK_STREAM) {
945 set_tcp_stream_priority(so);
946 }
947
948 if ((SOCK_DOM(so) == PF_INET ||
949 SOCK_DOM(so) == PF_INET6) &&
950 optval != oldval && (optval == SO_TC_BK_SYS ||
951 oldval == SO_TC_BK_SYS)) {
952 /*
953 * If the app switches from BK_SYS to something
954 * else, resume the socket if it was suspended.
955 */
956 if (oldval == SO_TC_BK_SYS) {
957 inp_reset_fc_state(so->so_pcb);
958 }
959
960 SOTHROTTLELOG("throttle[%d]: so 0x%llx "
961 "[%d,%d] opportunistic %s\n", so->last_pid,
962 (uint64_t)VM_KERNEL_ADDRPERM(so),
963 SOCK_DOM(so), SOCK_TYPE(so),
964 (optval == SO_TC_BK_SYS) ? "ON" : "OFF");
965 }
966 }
967 }
968 return error;
969 }
970
971 __private_extern__ int
so_set_net_service_type(struct socket * so,int netsvctype)972 so_set_net_service_type(struct socket *so, int netsvctype)
973 {
974 int sotc;
975 int error;
976
977 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype)) {
978 return EINVAL;
979 }
980
981 sotc = sotc_by_netservicetype[netsvctype];
982 error = so_set_traffic_class(so, sotc);
983 if (error != 0) {
984 return error;
985 }
986 so->so_netsvctype = (int8_t)netsvctype;
987 so->so_flags1 |= SOF1_TC_NET_SERV_TYPE;
988
989 return 0;
990 }
991
992 __private_extern__ void
so_set_default_traffic_class(struct socket * so)993 so_set_default_traffic_class(struct socket *so)
994 {
995 so->so_traffic_class = SO_TC_BE;
996
997 if ((SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6)) {
998 if (net_qos_policy_restricted == 0) {
999 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
1000 }
1001 #if (DEVELOPMENT || DEBUG)
1002 if (tfp_count > 0) {
1003 set_tclass_for_curr_proc(so);
1004 }
1005 #endif /* (DEVELOPMENT || DEBUG) */
1006 }
1007 }
1008
1009 __private_extern__ int
so_set_opportunistic(struct socket * so,int optval)1010 so_set_opportunistic(struct socket *so, int optval)
1011 {
1012 return so_set_traffic_class(so, (optval == 0) ?
1013 SO_TC_BE : SO_TC_BK_SYS);
1014 }
1015
1016 __private_extern__ int
so_get_opportunistic(struct socket * so)1017 so_get_opportunistic(struct socket *so)
1018 {
1019 return so->so_traffic_class == SO_TC_BK_SYS;
1020 }
1021
1022 __private_extern__ int
ip_tos_from_control(struct mbuf * control)1023 ip_tos_from_control(struct mbuf *control)
1024 {
1025 struct cmsghdr *cm;
1026 int tos = IPTOS_UNSPEC;
1027
1028 for (cm = M_FIRST_CMSGHDR(control);
1029 is_cmsg_valid(control, cm);
1030 cm = M_NXT_CMSGHDR(control, cm)) {
1031 if (cm->cmsg_len != CMSG_LEN(sizeof(int))) {
1032 continue;
1033 }
1034
1035 if ((cm->cmsg_level == IPPROTO_IP &&
1036 cm->cmsg_type == IP_TOS) ||
1037 (cm->cmsg_level == IPPROTO_IPV6 &&
1038 cm->cmsg_type == IPV6_TCLASS)) {
1039 tos = *(int *)(void *)CMSG_DATA(cm) & IPTOS_MASK;
1040 /* The first valid option wins */
1041 break;
1042 }
1043 }
1044
1045 return tos;
1046 }
1047
1048 /*
1049 * There is no traffic class for input packet
1050 */
1051 __private_extern__ void
so_recv_data_stat(struct socket * so,struct mbuf * m,size_t off)1052 so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off)
1053 {
1054 so->so_tc_stats[SO_STATS_DATA].rxpackets += 1;
1055 so->so_tc_stats[SO_STATS_DATA].rxbytes +=
1056 ((m->m_flags & M_PKTHDR) ? m->m_pkthdr.len : 0) + off;
1057 }
1058
1059 __private_extern__ void
so_inc_recv_data_stat(struct socket * so,size_t pkts,size_t bytes)1060 so_inc_recv_data_stat(struct socket *so, size_t pkts, size_t bytes)
1061 {
1062 so->so_tc_stats[SO_STATS_DATA].rxpackets += pkts;
1063 so->so_tc_stats[SO_STATS_DATA].rxbytes += bytes;
1064 }
1065
1066 static inline int
so_throttle_best_effort(struct socket * so,struct ifnet * ifp)1067 so_throttle_best_effort(struct socket *so, struct ifnet *ifp)
1068 {
1069 uint32_t uptime = (uint32_t)net_uptime();
1070 return soissrcbesteffort(so) &&
1071 net_io_policy_throttle_best_effort == 1 &&
1072 ifp->if_rt_sendts > 0 &&
1073 (int)(uptime - ifp->if_rt_sendts) <= TCP_BG_SWITCH_TIME;
1074 }
1075
1076 __private_extern__ void
set_tcp_stream_priority(struct socket * so)1077 set_tcp_stream_priority(struct socket *so)
1078 {
1079 struct inpcb *inp = sotoinpcb(so);
1080 struct tcpcb *tp = intotcpcb(inp);
1081 struct ifnet *outifp;
1082 u_char old_cc = tp->tcp_cc_index;
1083 int recvbg = IS_TCP_RECV_BG(so);
1084 bool is_local = false, fg_active = false;
1085 uint32_t uptime;
1086
1087 VERIFY((SOCK_CHECK_DOM(so, PF_INET) ||
1088 SOCK_CHECK_DOM(so, PF_INET6)) &&
1089 SOCK_CHECK_TYPE(so, SOCK_STREAM) &&
1090 SOCK_CHECK_PROTO(so, IPPROTO_TCP));
1091
1092 /* Return if the socket is in a terminal state */
1093 if (inp->inp_state == INPCB_STATE_DEAD) {
1094 return;
1095 }
1096
1097 outifp = inp->inp_last_outifp;
1098 uptime = (uint32_t)net_uptime();
1099
1100 /*
1101 * If the socket was marked as a background socket or if the
1102 * traffic class is set to background with traffic class socket
1103 * option then make both send and recv side of the stream to be
1104 * background. The variable sotcdb which can be set with sysctl
1105 * is used to disable these settings for testing.
1106 */
1107 if (outifp == NULL || (outifp->if_flags & IFF_LOOPBACK)) {
1108 is_local = true;
1109 }
1110
1111 /* Check if there has been recent foreground activity */
1112 if (outifp != NULL) {
1113 /*
1114 * If the traffic source is background, check if
1115 * there is recent foreground activity which should
1116 * continue to keep the traffic source as background.
1117 * Otherwise, we can switch the traffic source to
1118 * foreground.
1119 */
1120 if (soissrcbackground(so) && outifp->if_fg_sendts > 0 &&
1121 (int)(uptime - outifp->if_fg_sendts) <= TCP_BG_SWITCH_TIME) {
1122 fg_active = true;
1123 }
1124
1125 /*
1126 * The traffic source is best-effort -- check if
1127 * the policy to throttle best effort is enabled
1128 * and there was realtime activity on this
1129 * interface recently. If this is true, enable
1130 * algorithms that respond to increased latency
1131 * on best-effort traffic.
1132 */
1133 if (so_throttle_best_effort(so, outifp)) {
1134 fg_active = true;
1135 }
1136 }
1137
1138 /*
1139 * System initiated background traffic like cloud uploads should
1140 * always use background delay sensitive algorithms. This will
1141 * make the stream more responsive to other streams on the user's
1142 * network and it will minimize latency induced.
1143 */
1144 if (fg_active || IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
1145 /*
1146 * If the interface that the connection is using is
1147 * loopback, do not use background congestion
1148 * control algorithm.
1149 *
1150 * If there has been recent foreground activity or if there
1151 * was an indication that a real time foreground application
1152 * is going to use networking (net_io_policy_throttled),
1153 * switch the background and best effort streams to use background
1154 * congestion control algorithm.
1155 */
1156 if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0 || is_local) {
1157 if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) {
1158 tcp_set_foreground_cc(so);
1159 }
1160 } else {
1161 if (old_cc != TCP_CC_ALGO_BACKGROUND_INDEX) {
1162 tcp_set_background_cc(so);
1163 }
1164 }
1165
1166 /* Set receive side background flags */
1167 if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0 || is_local) {
1168 tcp_clear_recv_bg(so);
1169 } else {
1170 tcp_set_recv_bg(so);
1171 }
1172 } else {
1173 /*
1174 * If there is no recent foreground activity, even the
1175 * background flows can use foreground congestion controller.
1176 */
1177 tcp_clear_recv_bg(so);
1178 if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) {
1179 tcp_set_foreground_cc(so);
1180 }
1181 }
1182
1183 if (old_cc != tp->tcp_cc_index || recvbg != IS_TCP_RECV_BG(so)) {
1184 SOTHROTTLELOG("throttle[%d]: so 0x%llx [%d,%d] TCP %s send; "
1185 "%s recv\n", so->last_pid,
1186 (uint64_t)VM_KERNEL_ADDRPERM(so),
1187 SOCK_DOM(so), SOCK_TYPE(so),
1188 (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) ?
1189 "background" : "foreground",
1190 IS_TCP_RECV_BG(so) ? "background" : "foreground");
1191 }
1192 }
1193
1194 /*
1195 * Set traffic class to an IPv4 or IPv6 packet
1196 * - mark the mbuf
1197 * - set the DSCP code following the WMM mapping
1198 */
1199 __private_extern__ void
set_packet_service_class(struct mbuf * m,struct socket * so,int sotc,uint32_t flags)1200 set_packet_service_class(struct mbuf *m, struct socket *so,
1201 int sotc, uint32_t flags)
1202 {
1203 mbuf_svc_class_t msc = MBUF_SC_BE; /* Best effort by default */
1204 struct inpcb *inp = sotoinpcb(so); /* in6pcb and inpcb are the same */
1205
1206 if (!(m->m_flags & M_PKTHDR)) {
1207 return;
1208 }
1209
1210 /*
1211 * Here is the precedence:
1212 * 1) TRAFFIC_MGT_SO_BACKGROUND trumps all
1213 * 2) Traffic class passed via ancillary data to sendmsdg(2)
1214 * 3) Traffic class socket option last
1215 */
1216 if (sotc != SO_TC_UNSPEC) {
1217 VERIFY(SO_VALID_TC(sotc));
1218 msc = so_tc2msc(sotc);
1219 /* Assert because tc must have been valid */
1220 VERIFY(MBUF_VALID_SC(msc));
1221 }
1222
1223 /*
1224 * If TRAFFIC_MGT_SO_BACKGROUND is set or policy to throttle
1225 * best effort is set, depress the priority.
1226 */
1227 if (!IS_MBUF_SC_BACKGROUND(msc) && soisthrottled(so)) {
1228 msc = MBUF_SC_BK;
1229 }
1230
1231 if (IS_MBUF_SC_BESTEFFORT(msc) && inp->inp_last_outifp != NULL &&
1232 so_throttle_best_effort(so, inp->inp_last_outifp)) {
1233 msc = MBUF_SC_BK;
1234 }
1235
1236 if (soissrcbackground(so)) {
1237 m->m_pkthdr.pkt_flags |= PKTF_SO_BACKGROUND;
1238 }
1239
1240 if (soissrcrealtime(so) || IS_MBUF_SC_REALTIME(msc)) {
1241 m->m_pkthdr.pkt_flags |= PKTF_SO_REALTIME;
1242 }
1243 /*
1244 * Set the traffic class in the mbuf packet header svc field
1245 */
1246 if (sotcdb & SOTCDB_NO_MTC) {
1247 goto no_mbtc;
1248 }
1249
1250 /*
1251 * Elevate service class if the packet is a pure TCP ACK.
1252 * We can do this only when the flow is not a background
1253 * flow and the outgoing interface supports
1254 * transmit-start model.
1255 */
1256 if (!IS_MBUF_SC_BACKGROUND(msc) &&
1257 (flags & (PKT_SCF_TCP_ACK | PKT_SCF_TCP_SYN)) != 0) {
1258 msc = MBUF_SC_CTL;
1259 }
1260
1261 (void) m_set_service_class(m, msc);
1262
1263 /*
1264 * Set the privileged traffic auxiliary flag if applicable,
1265 * or clear it.
1266 */
1267 if (!(sotcdb & SOTCDB_NO_PRIVILEGED) && soisprivilegedtraffic(so) &&
1268 msc != MBUF_SC_UNSPEC) {
1269 m->m_pkthdr.pkt_flags |= PKTF_PRIO_PRIVILEGED;
1270 } else {
1271 m->m_pkthdr.pkt_flags &= ~PKTF_PRIO_PRIVILEGED;
1272 }
1273
1274 no_mbtc:
1275 /*
1276 * For TCP with background traffic class switch CC algo based on sysctl
1277 */
1278 if (so->so_type == SOCK_STREAM) {
1279 set_tcp_stream_priority(so);
1280 }
1281 }
1282
1283 __private_extern__ mbuf_svc_class_t
so_tc2msc(int tc)1284 so_tc2msc(int tc)
1285 {
1286 mbuf_svc_class_t msc;
1287
1288 switch (tc) {
1289 case SO_TC_BK_SYS:
1290 msc = MBUF_SC_BK_SYS;
1291 break;
1292 case SO_TC_BK:
1293 case _SO_TC_BK:
1294 msc = MBUF_SC_BK;
1295 break;
1296 case SO_TC_BE:
1297 msc = MBUF_SC_BE;
1298 break;
1299 case SO_TC_RD:
1300 msc = MBUF_SC_RD;
1301 break;
1302 case SO_TC_OAM:
1303 msc = MBUF_SC_OAM;
1304 break;
1305 case SO_TC_AV:
1306 msc = MBUF_SC_AV;
1307 break;
1308 case SO_TC_RV:
1309 msc = MBUF_SC_RV;
1310 break;
1311 case SO_TC_VI:
1312 case _SO_TC_VI:
1313 msc = MBUF_SC_VI;
1314 break;
1315 case SO_TC_NETSVC_SIG:
1316 msc = MBUF_SC_SIG;
1317 break;
1318 case SO_TC_VO:
1319 case _SO_TC_VO:
1320 msc = MBUF_SC_VO;
1321 break;
1322 case SO_TC_CTL:
1323 msc = MBUF_SC_CTL;
1324 break;
1325 case SO_TC_ALL:
1326 default:
1327 msc = MBUF_SC_UNSPEC;
1328 break;
1329 }
1330
1331 return msc;
1332 }
1333
1334 __private_extern__ int
so_svc2tc(mbuf_svc_class_t svc)1335 so_svc2tc(mbuf_svc_class_t svc)
1336 {
1337 switch (svc) {
1338 case MBUF_SC_BK_SYS:
1339 return SO_TC_BK_SYS;
1340 case MBUF_SC_BK:
1341 return SO_TC_BK;
1342 case MBUF_SC_BE:
1343 return SO_TC_BE;
1344 case MBUF_SC_RD:
1345 return SO_TC_RD;
1346 case MBUF_SC_OAM:
1347 return SO_TC_OAM;
1348 case MBUF_SC_AV:
1349 return SO_TC_AV;
1350 case MBUF_SC_RV:
1351 return SO_TC_RV;
1352 case MBUF_SC_VI:
1353 return SO_TC_VI;
1354 case MBUF_SC_SIG:
1355 return SO_TC_NETSVC_SIG;
1356 case MBUF_SC_VO:
1357 return SO_TC_VO;
1358 case MBUF_SC_CTL:
1359 return SO_TC_CTL;
1360 case MBUF_SC_UNSPEC:
1361 default:
1362 return SO_TC_BE;
1363 }
1364 }
1365
1366 static size_t
sotc_index(int sotc)1367 sotc_index(int sotc)
1368 {
1369 switch (sotc) {
1370 case SO_TC_BK_SYS:
1371 return SOTCIX_BK_SYS;
1372 case _SO_TC_BK:
1373 case SO_TC_BK:
1374 return SOTCIX_BK;
1375
1376 case SO_TC_BE:
1377 return SOTCIX_BE;
1378 case SO_TC_RD:
1379 return SOTCIX_RD;
1380 case SO_TC_OAM:
1381 return SOTCIX_OAM;
1382
1383 case SO_TC_AV:
1384 return SOTCIX_AV;
1385 case SO_TC_RV:
1386 return SOTCIX_RV;
1387 case _SO_TC_VI:
1388 case SO_TC_VI:
1389 return SOTCIX_VI;
1390
1391 case _SO_TC_VO:
1392 case SO_TC_VO:
1393 return SOTCIX_VO;
1394 case SO_TC_CTL:
1395 return SOTCIX_CTL;
1396
1397 default:
1398 break;
1399 }
1400 /*
1401 * Unknown traffic class value
1402 */
1403 return SIZE_T_MAX;
1404 }
1405
1406 uint8_t
fastlane_sc_to_dscp(uint32_t svc_class)1407 fastlane_sc_to_dscp(uint32_t svc_class)
1408 {
1409 uint8_t dscp = _DSCP_DF;
1410
1411 switch (svc_class) {
1412 case MBUF_SC_BK_SYS:
1413 case MBUF_SC_BK:
1414 dscp = _DSCP_AF11;
1415 break;
1416
1417 case MBUF_SC_BE:
1418 dscp = _DSCP_DF;
1419 break;
1420 case MBUF_SC_RD:
1421 dscp = _DSCP_AF21;
1422 break;
1423 case MBUF_SC_OAM:
1424 dscp = _DSCP_CS2;
1425 break;
1426
1427 case MBUF_SC_AV:
1428 dscp = _DSCP_AF31;
1429 break;
1430 case MBUF_SC_RV:
1431 dscp = _DSCP_CS4;
1432 break;
1433 case MBUF_SC_VI:
1434 dscp = _DSCP_AF41;
1435 break;
1436 case MBUF_SC_SIG:
1437 dscp = _DSCP_CS3;
1438 break;
1439
1440 case MBUF_SC_VO:
1441 dscp = _DSCP_EF;
1442 break;
1443 case MBUF_SC_CTL:
1444 dscp = _DSCP_DF;
1445 break;
1446 default:
1447 dscp = _DSCP_DF;
1448 break;
1449 }
1450
1451 return dscp;
1452 }
1453
1454 uint8_t
rfc4594_sc_to_dscp(uint32_t svc_class)1455 rfc4594_sc_to_dscp(uint32_t svc_class)
1456 {
1457 uint8_t dscp = _DSCP_DF;
1458
1459 switch (svc_class) {
1460 case MBUF_SC_BK_SYS: /* Low-Priority Data */
1461 case MBUF_SC_BK:
1462 dscp = _DSCP_CS1;
1463 break;
1464
1465 case MBUF_SC_BE: /* Standard */
1466 dscp = _DSCP_DF;
1467 break;
1468 case MBUF_SC_RD: /* Low-Latency Data */
1469 dscp = _DSCP_AF21;
1470 break;
1471
1472 /* SVC_CLASS Not Defined: High-Throughput Data */
1473
1474 case MBUF_SC_OAM: /* OAM */
1475 dscp = _DSCP_CS2;
1476 break;
1477
1478 /* SVC_CLASS Not Defined: Broadcast Video */
1479
1480 case MBUF_SC_AV: /* Multimedia Streaming */
1481 dscp = _DSCP_AF31;
1482 break;
1483 case MBUF_SC_RV: /* Real-Time Interactive */
1484 dscp = _DSCP_CS4;
1485 break;
1486 case MBUF_SC_VI: /* Multimedia Conferencing */
1487 dscp = _DSCP_AF41;
1488 break;
1489 case MBUF_SC_SIG: /* Signaling */
1490 dscp = _DSCP_CS5;
1491 break;
1492
1493 case MBUF_SC_VO: /* Telephony */
1494 dscp = _DSCP_EF;
1495 break;
1496 case MBUF_SC_CTL: /* Network Control*/
1497 dscp = _DSCP_CS6;
1498 break;
1499 default:
1500 dscp = _DSCP_DF;
1501 break;
1502 }
1503
1504 return dscp;
1505 }
1506
1507 mbuf_traffic_class_t
rfc4594_dscp_to_tc(uint8_t dscp)1508 rfc4594_dscp_to_tc(uint8_t dscp)
1509 {
1510 mbuf_traffic_class_t tc = MBUF_TC_BE;
1511
1512 switch (dscp) {
1513 case _DSCP_CS1:
1514 tc = MBUF_TC_BK;
1515 break;
1516 case _DSCP_DF:
1517 case _DSCP_AF21:
1518 case _DSCP_CS2:
1519 tc = MBUF_TC_BE;
1520 break;
1521 case _DSCP_AF31:
1522 case _DSCP_CS4:
1523 case _DSCP_AF41:
1524 case _DSCP_CS5:
1525 tc = MBUF_TC_VI;
1526 break;
1527 case _DSCP_EF:
1528 case _DSCP_CS6:
1529 tc = MBUF_TC_VO;
1530 break;
1531 default:
1532 tc = MBUF_TC_BE;
1533 break;
1534 }
1535
1536 return tc;
1537 }
1538
1539 /*
1540 * Pass NULL ifp for default map
1541 */
1542 static errno_t
set_netsvctype_dscp_map(struct net_qos_dscp_map * net_qos_dscp_map,const struct netsvctype_dscp_map * __counted_by (_NET_SERVICE_TYPE_COUNT)netsvctype_dscp_map)1543 set_netsvctype_dscp_map(struct net_qos_dscp_map *net_qos_dscp_map,
1544 const struct netsvctype_dscp_map *__counted_by(_NET_SERVICE_TYPE_COUNT) netsvctype_dscp_map)
1545 {
1546 size_t i;
1547 int netsvctype;
1548
1549 VERIFY(netsvctype_dscp_map != NULL);
1550 /*
1551 * Do not accept more that max number of distinct DSCPs
1552 */
1553 if (net_qos_dscp_map == NULL) {
1554 return EINVAL;
1555 }
1556
1557 /*
1558 * Validate input parameters
1559 */
1560 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1561 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype_dscp_map[i].netsvctype)) {
1562 return EINVAL;
1563 }
1564 if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
1565 return EINVAL;
1566 }
1567 }
1568
1569 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1570 netsvctype = netsvctype_dscp_map[i].netsvctype;
1571
1572 net_qos_dscp_map->netsvctype_to_dscp[netsvctype] =
1573 netsvctype_dscp_map[i].dscp;
1574 }
1575 for (netsvctype = 0; netsvctype < _NET_SERVICE_TYPE_COUNT; netsvctype++) {
1576 switch (netsvctype) {
1577 case NET_SERVICE_TYPE_BE:
1578 case NET_SERVICE_TYPE_BK:
1579 case NET_SERVICE_TYPE_VI:
1580 case NET_SERVICE_TYPE_VO:
1581 case NET_SERVICE_TYPE_RV:
1582 case NET_SERVICE_TYPE_AV:
1583 case NET_SERVICE_TYPE_OAM:
1584 case NET_SERVICE_TYPE_RD: {
1585 size_t sotcix;
1586
1587 sotcix = sotc_index(sotc_by_netservicetype[netsvctype]);
1588 if (sotcix != SIZE_T_MAX) {
1589 net_qos_dscp_map->sotc_to_dscp[sotcix] =
1590 netsvctype_dscp_map[netsvctype].dscp;
1591 }
1592 break;
1593 }
1594 case NET_SERVICE_TYPE_SIG:
1595 /* Signaling does not have its own traffic class */
1596 break;
1597 default:
1598 /* We should not be here */
1599 ASSERT(0);
1600 }
1601 }
1602 if (net_qos_dscp_map == &fastlane_net_qos_dscp_map) {
1603 /* Network control socket traffic class is always best effort for fastlane*/
1604 net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_DF;
1605 } else {
1606 net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_CS6;
1607 }
1608
1609 /* Background system socket traffic class DSCP same as background */
1610 net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK_SYS] =
1611 net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK];
1612
1613 return 0;
1614 }
1615
1616 static size_t
get_netsvctype_dscp_map(struct netsvctype_dscp_map * __counted_by (_NET_SERVICE_TYPE_COUNT)netsvctype_dscp_map)1617 get_netsvctype_dscp_map(struct netsvctype_dscp_map *__counted_by(_NET_SERVICE_TYPE_COUNT) netsvctype_dscp_map)
1618 {
1619 struct net_qos_dscp_map *net_qos_dscp_map;
1620 int i;
1621
1622 net_qos_dscp_map = &fastlane_net_qos_dscp_map;
1623
1624 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1625 netsvctype_dscp_map[i].netsvctype = i;
1626 netsvctype_dscp_map[i].dscp = net_qos_dscp_map->netsvctype_to_dscp[i];
1627 }
1628
1629 return i * sizeof(struct netsvctype_dscp_map);
1630 }
1631
1632 void
net_qos_map_init()1633 net_qos_map_init()
1634 {
1635 errno_t error;
1636
1637 error = set_netsvctype_dscp_map(&fastlane_net_qos_dscp_map,
1638 fastlane_netsvctype_dscp_map);
1639 ASSERT(error == 0);
1640
1641 error = set_netsvctype_dscp_map(&rfc4594_net_qos_dscp_map,
1642 rfc4594_netsvctype_dscp_map);
1643 ASSERT(error == 0);
1644
1645 #if (DEBUG || DEVELOPMENT)
1646 error = set_netsvctype_dscp_map(&custom_net_qos_dscp_map,
1647 rfc4594_netsvctype_dscp_map);
1648 ASSERT(error == 0);
1649
1650 #endif /* (DEBUG || DEVELOPMENT) */
1651
1652 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
1653 }
1654
1655 int
1656 sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS
1657 {
1658 #pragma unused(oidp, arg1, arg2)
1659 int error = 0;
1660
1661 if (req->oldptr == USER_ADDR_NULL) {
1662 req->oldidx =
1663 _NET_SERVICE_TYPE_COUNT * sizeof(struct netsvctype_dscp_map);
1664 } else if (req->oldlen > 0) {
1665 struct netsvctype_dscp_map netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {};
1666 size_t len;
1667
1668 len = get_netsvctype_dscp_map(netsvctype_dscp_map);
1669
1670 error = SYSCTL_OUT(req, netsvctype_dscp_map,
1671 MIN(len, req->oldlen));
1672 if (error != 0) {
1673 goto done;
1674 }
1675 }
1676
1677 if (req->newptr != USER_ADDR_NULL) {
1678 error = EPERM;
1679 }
1680 done:
1681 return error;
1682 }
1683
1684 __private_extern__ errno_t
set_packet_qos(struct mbuf * m,struct ifnet * ifp,boolean_t qos_allowed,int sotc,int netsvctype,uint8_t * dscp_inout)1685 set_packet_qos(struct mbuf *m, struct ifnet *ifp, boolean_t qos_allowed,
1686 int sotc, int netsvctype, uint8_t *dscp_inout)
1687 {
1688 if (ifp == NULL || dscp_inout == NULL) {
1689 return EINVAL;
1690 }
1691
1692 if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0 &&
1693 ifp->if_qosmarking_mode != IFRTYPE_QOSMARKING_MODE_NONE) {
1694 uint8_t dscp;
1695 const struct net_qos_dscp_map *net_qos_dscp_map = NULL;
1696
1697 switch (ifp->if_qosmarking_mode) {
1698 case IFRTYPE_QOSMARKING_FASTLANE:
1699 net_qos_dscp_map = &fastlane_net_qos_dscp_map;
1700 break;
1701 case IFRTYPE_QOSMARKING_RFC4594:
1702 net_qos_dscp_map = &rfc4594_net_qos_dscp_map;
1703 break;
1704 #if (DEBUG || DEVELOPMENT)
1705 case IFRTYPE_QOSMARKING_CUSTOM:
1706 net_qos_dscp_map = &custom_net_qos_dscp_map;
1707 break;
1708 #endif /* (DEBUG || DEVELOPMENT) */
1709 default:
1710 panic("invalid QoS marking type");
1711 /* NOTREACHED */
1712 }
1713
1714 /*
1715 * When on a Fastlane network, IP_TOS/IPV6_TCLASS are no-ops
1716 */
1717 dscp = _DSCP_DF;
1718
1719 /*
1720 * For DSCP use the network service type is specified, otherwise
1721 * use the socket traffic class
1722 *
1723 * When not whitelisted by the policy, set DSCP only for best
1724 * effort and background, and set the mbuf service class to
1725 * best effort as well so the packet will be queued and
1726 * scheduled at a lower priority.
1727 * We still want to prioritize control traffic on the interface
1728 * so we do not change the mbuf service class for SO_TC_CTL
1729 */
1730 if (IS_VALID_NET_SERVICE_TYPE(netsvctype) &&
1731 netsvctype != NET_SERVICE_TYPE_BE) {
1732 dscp = net_qos_dscp_map->netsvctype_to_dscp[netsvctype];
1733
1734 if (qos_allowed == FALSE &&
1735 netsvctype != NET_SERVICE_TYPE_BE &&
1736 netsvctype != NET_SERVICE_TYPE_BK) {
1737 dscp = _DSCP_DF;
1738 if (sotc != SO_TC_CTL) {
1739 m_set_service_class(m, MBUF_SC_BE);
1740 }
1741 }
1742 } else if (sotc != SO_TC_UNSPEC) {
1743 size_t sotcix = sotc_index(sotc);
1744 if (sotcix != SIZE_T_MAX) {
1745 dscp = net_qos_dscp_map->sotc_to_dscp[sotcix];
1746
1747 if (qos_allowed == FALSE && sotc != SO_TC_BE &&
1748 sotc != SO_TC_BK && sotc != SO_TC_BK_SYS &&
1749 sotc != SO_TC_CTL) {
1750 dscp = _DSCP_DF;
1751 if (sotc != SO_TC_CTL) {
1752 m_set_service_class(m, MBUF_SC_BE);
1753 }
1754 }
1755 }
1756 }
1757 if (net_qos_verbose != 0) {
1758 printf("%s qos_allowed %d sotc %u netsvctype %u dscp %u\n",
1759 __func__, qos_allowed, sotc, netsvctype, dscp);
1760 }
1761
1762 if (*dscp_inout != dscp) {
1763 *dscp_inout = dscp;
1764 }
1765 } else if (*dscp_inout != _DSCP_DF && IFNET_IS_WIFI_INFRA(ifp)) {
1766 mbuf_svc_class_t msc = m_get_service_class(m);
1767
1768 /*
1769 * For WiFi infra, when the mbuf service class is best effort
1770 * and the DSCP is not default, set the service class based
1771 * on DSCP
1772 */
1773 if (msc == MBUF_SC_BE) {
1774 msc = wifi_dscp_to_msc_array[*dscp_inout];
1775
1776 if (msc != MBUF_SC_BE) {
1777 m_set_service_class(m, msc);
1778
1779 if (net_qos_verbose != 0) {
1780 printf("%s set msc %u for dscp %u\n",
1781 __func__, msc, *dscp_inout);
1782 }
1783 }
1784 }
1785 }
1786
1787 return 0;
1788 }
1789
1790 static void
set_dscp_to_wifi_ac_map(const struct dcsp_msc_map * __indexable map,int clear)1791 set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *__indexable map, int clear)
1792 {
1793 int i;
1794
1795 if (clear) {
1796 bzero(wifi_dscp_to_msc_array, sizeof(wifi_dscp_to_msc_array));
1797 }
1798
1799 for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
1800 const struct dcsp_msc_map *elem = map + i;
1801
1802 if (elem->dscp > _MAX_DSCP || elem->msc == MBUF_SC_UNSPEC) {
1803 break;
1804 }
1805 switch (elem->msc) {
1806 case MBUF_SC_BK_SYS:
1807 case MBUF_SC_BK:
1808 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BK;
1809 break;
1810 default:
1811 case MBUF_SC_BE:
1812 case MBUF_SC_RD:
1813 case MBUF_SC_OAM:
1814 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BE;
1815 break;
1816 case MBUF_SC_AV:
1817 case MBUF_SC_RV:
1818 case MBUF_SC_VI:
1819 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VI;
1820 break;
1821 case MBUF_SC_VO:
1822 case MBUF_SC_CTL:
1823 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VO;
1824 break;
1825 }
1826 }
1827 }
1828
1829 static errno_t
dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map * __counted_by (count)netsvctype_dscp_map,size_t count,struct dcsp_msc_map * __counted_by (DSCP_ARRAY_SIZE)dcsp_msc_map)1830 dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *__counted_by(count) netsvctype_dscp_map,
1831 size_t count, struct dcsp_msc_map *__counted_by(DSCP_ARRAY_SIZE) dcsp_msc_map)
1832 {
1833 errno_t error = 0;
1834 uint32_t i;
1835
1836 /*
1837 * Validate input parameters
1838 */
1839 for (i = 0; i < count; i++) {
1840 if (!SO_VALID_TC(netsvctype_dscp_map[i].netsvctype)) {
1841 error = EINVAL;
1842 goto done;
1843 }
1844 if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
1845 error = EINVAL;
1846 goto done;
1847 }
1848 }
1849
1850 bzero(dcsp_msc_map, DSCP_ARRAY_SIZE * sizeof(struct dcsp_msc_map));
1851
1852 for (i = 0; i < count; i++) {
1853 dcsp_msc_map[i].dscp = netsvctype_dscp_map[i].dscp;
1854 dcsp_msc_map[i].msc = so_tc2msc(netsvctype_dscp_map[i].netsvctype);
1855 }
1856 done:
1857 return error;
1858 }
1859
1860 int
1861 sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
1862 {
1863 #pragma unused(oidp, arg1, arg2)
1864 int error = 0;
1865 size_t len = DSCP_ARRAY_SIZE * sizeof(struct netsvctype_dscp_map);
1866 struct netsvctype_dscp_map netsvctype_dscp_map[DSCP_ARRAY_SIZE] = {};
1867 struct dcsp_msc_map dcsp_msc_map[DSCP_ARRAY_SIZE];
1868 size_t count;
1869
1870 if (req->oldptr == USER_ADDR_NULL) {
1871 req->oldidx = len;
1872 } else if (req->oldlen > 0) {
1873 uint8_t i;
1874
1875 for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
1876 netsvctype_dscp_map[i].dscp = i;
1877 netsvctype_dscp_map[i].netsvctype =
1878 so_svc2tc(wifi_dscp_to_msc_array[i]);
1879 }
1880 error = SYSCTL_OUT(req, netsvctype_dscp_map,
1881 MIN(len, req->oldlen));
1882 if (error != 0) {
1883 goto done;
1884 }
1885 }
1886
1887 if (req->newptr == USER_ADDR_NULL) {
1888 goto done;
1889 }
1890
1891 error = proc_suser(current_proc());
1892 if (error != 0) {
1893 goto done;
1894 }
1895
1896 /*
1897 * Check input length
1898 */
1899 if (req->newlen > len) {
1900 error = EINVAL;
1901 goto done;
1902 }
1903 /*
1904 * Cap the number of entries to copy from input buffer
1905 */
1906 if (len > req->newlen) {
1907 len = req->newlen;
1908 }
1909 error = SYSCTL_IN(req, netsvctype_dscp_map, len);
1910 if (error != 0) {
1911 goto done;
1912 }
1913 count = len / sizeof(struct netsvctype_dscp_map);
1914 bzero(dcsp_msc_map, sizeof(dcsp_msc_map));
1915 error = dscp_msc_map_from_netsvctype_dscp_map(netsvctype_dscp_map, count,
1916 dcsp_msc_map);
1917 if (error != 0) {
1918 goto done;
1919 }
1920 set_dscp_to_wifi_ac_map(dcsp_msc_map, 0);
1921 done:
1922 return error;
1923 }
1924
1925 int
1926 sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
1927 {
1928 #pragma unused(oidp, arg1, arg2)
1929 int error = 0;
1930 int val = 0;
1931
1932 error = sysctl_handle_int(oidp, &val, 0, req);
1933 if (error || !req->newptr) {
1934 return error;
1935 }
1936 if (req->newptr == USER_ADDR_NULL) {
1937 return 0;
1938 }
1939 error = proc_suser(current_proc());
1940 if (error != 0) {
1941 return error;
1942 }
1943
1944 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
1945
1946 return 0;
1947 }
1948
1949 /*
1950 * Returns whether a large upload or download transfer should be marked as
1951 * BK service type for network activity. This is a system level
1952 * hint/suggestion to classify application traffic based on statistics
1953 * collected from the current network attachment
1954 *
1955 * Returns 1 for BK and 0 for default
1956 */
1957
1958 int
net_qos_guideline(struct proc * p,struct net_qos_guideline_args * arg,int * retval)1959 net_qos_guideline(struct proc *p, struct net_qos_guideline_args *arg,
1960 int *retval)
1961 {
1962 #pragma unused(p)
1963 #define RETURN_USE_BK 1
1964 #define RETURN_USE_DEFAULT 0
1965 struct net_qos_param qos_arg;
1966 struct ifnet *ipv4_primary, *ipv6_primary;
1967 int err = 0;
1968
1969 if (arg->param == USER_ADDR_NULL || retval == NULL ||
1970 arg->param_len != sizeof(qos_arg)) {
1971 return EINVAL;
1972 }
1973 err = copyin(arg->param, (caddr_t) &qos_arg, sizeof(qos_arg));
1974 if (err != 0) {
1975 return err;
1976 }
1977
1978 *retval = RETURN_USE_DEFAULT;
1979 ipv4_primary = ifindex2ifnet[get_primary_ifscope(AF_INET)];
1980 ipv6_primary = ifindex2ifnet[get_primary_ifscope(AF_INET6)];
1981
1982 /*
1983 * If either of the interfaces is in Low Internet mode, enable
1984 * background delay based algorithms on this transfer
1985 */
1986 if (qos_arg.nq_uplink) {
1987 if ((ipv4_primary != NULL &&
1988 (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_UL)) ||
1989 (ipv6_primary != NULL &&
1990 (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_UL))) {
1991 *retval = RETURN_USE_BK;
1992 return 0;
1993 }
1994 } else {
1995 if ((ipv4_primary != NULL &&
1996 (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_DL)) ||
1997 (ipv6_primary != NULL &&
1998 (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_DL))) {
1999 *retval = RETURN_USE_BK;
2000 return 0;
2001 }
2002 }
2003
2004 /*
2005 * Some times IPv4 and IPv6 primary interfaces can be different.
2006 * In this case, if either of them is non-cellular, we should mark
2007 * the transfer as BK as it can potentially get used based on
2008 * the host name resolution
2009 */
2010 if (ipv4_primary != NULL && IFNET_IS_EXPENSIVE(ipv4_primary) &&
2011 ipv6_primary != NULL && IFNET_IS_EXPENSIVE(ipv6_primary)) {
2012 if (qos_arg.nq_use_expensive) {
2013 return 0;
2014 } else {
2015 *retval = RETURN_USE_BK;
2016 return 0;
2017 }
2018 }
2019 if (ipv4_primary != NULL && IFNET_IS_CONSTRAINED(ipv4_primary) &&
2020 ipv6_primary != NULL && IFNET_IS_CONSTRAINED(ipv6_primary)) {
2021 if (qos_arg.nq_use_constrained) {
2022 return 0;
2023 } else {
2024 *retval = RETURN_USE_BK;
2025 return 0;
2026 }
2027 }
2028 if (qos_arg.nq_transfer_size >= 5 * 1024 * 1024) {
2029 *retval = RETURN_USE_BK;
2030 return 0;
2031 }
2032
2033
2034 #undef RETURN_USE_BK
2035 #undef RETURN_USE_DEFAULT
2036 return 0;
2037 }
2038
2039 #if (DEBUG || DEVELOPMENT)
2040 /*
2041 * Customizable QoS mapping table
2042 * By default it uses the mapping table for RFC 4594
2043 *
2044 * Notes:
2045 * BK_SYS is the same as BK
2046 * CTL cannot be changed and is always _DSCP_CS6
2047 */
2048 SYSCTL_NODE(_net_qos, OID_AUTO, custom,
2049 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
2050
2051 SYSCTL_NODE(_net_qos_custom, OID_AUTO, netsvctype_to_dscp,
2052 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
2053
2054 static int sysctl_net_qos_custom_netsvctype_to_dscp SYSCTL_HANDLER_ARGS;
2055 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, be,
2056 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2057 0, NET_SERVICE_TYPE_BE, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2058 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, bk,
2059 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2060 0, NET_SERVICE_TYPE_BK, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2061 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, sig,
2062 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2063 0, NET_SERVICE_TYPE_SIG, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2064 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, vi,
2065 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2066 0, NET_SERVICE_TYPE_VI, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2067 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, vo,
2068 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2069 0, NET_SERVICE_TYPE_VO, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2070 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, rv,
2071 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2072 0, NET_SERVICE_TYPE_RV, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2073 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, av,
2074 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2075 0, NET_SERVICE_TYPE_AV, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2076 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, oam,
2077 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2078 0, NET_SERVICE_TYPE_OAM, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2079 SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, rd,
2080 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2081 0, NET_SERVICE_TYPE_RD, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2082
2083 static int sysctl_net_qos_custom_reset SYSCTL_HANDLER_ARGS;
2084 SYSCTL_PROC(_net_qos_custom, OID_AUTO, reset,
2085 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2086 0, 0, sysctl_net_qos_custom_reset, "I", "");
2087
2088 int
2089 sysctl_net_qos_custom_netsvctype_to_dscp SYSCTL_HANDLER_ARGS
2090 {
2091 #pragma unused(arg1)
2092 int error = 0;
2093
2094 switch (arg2) {
2095 case NET_SERVICE_TYPE_BE:
2096 case NET_SERVICE_TYPE_BK:
2097 case NET_SERVICE_TYPE_SIG:
2098 case NET_SERVICE_TYPE_VI:
2099 case NET_SERVICE_TYPE_VO:
2100 case NET_SERVICE_TYPE_RV:
2101 case NET_SERVICE_TYPE_AV:
2102 case NET_SERVICE_TYPE_OAM:
2103 case NET_SERVICE_TYPE_RD:
2104 break;
2105 default:
2106 os_log(OS_LOG_DEFAULT, "%s: unexpected netsvctype %d",
2107 __func__, arg2);
2108 return EINVAL;
2109 }
2110
2111 int val = custom_net_qos_dscp_map.netsvctype_to_dscp[arg2];
2112 error = sysctl_handle_int(oidp, &val, 0, req);
2113 if (error != 0 || req->newptr == USER_ADDR_NULL) {
2114 return error;
2115 }
2116 if (req->newptr == USER_ADDR_NULL) {
2117 return 0;
2118 }
2119 error = proc_suser(current_proc());
2120 if (error != 0) {
2121 return error;
2122 }
2123 if (val < 0 || val > _MAX_DSCP) {
2124 os_log(OS_LOG_DEFAULT, "%s: unexpected DSCP %d",
2125 __func__, val);
2126 return EINVAL;
2127 }
2128
2129 struct netsvctype_dscp_map netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {};
2130
2131 for (int i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
2132 netsvctype_dscp_map[i].netsvctype = i;
2133 netsvctype_dscp_map[i].dscp = custom_net_qos_dscp_map.netsvctype_to_dscp[i];
2134 }
2135 netsvctype_dscp_map[arg2].dscp = (uint8_t) val;
2136
2137 error = set_netsvctype_dscp_map(&custom_net_qos_dscp_map,
2138 netsvctype_dscp_map);
2139
2140 return 0;
2141 }
2142
2143 int
2144 sysctl_net_qos_custom_reset SYSCTL_HANDLER_ARGS
2145 {
2146 #pragma unused(arg1, arg2)
2147 int error = 0;
2148 int val = 0;
2149
2150 error = sysctl_handle_int(oidp, &val, 0, req);
2151 if (error || !req->newptr) {
2152 return error;
2153 }
2154 if (req->newptr == USER_ADDR_NULL) {
2155 return 0;
2156 }
2157 error = proc_suser(current_proc());
2158 if (error != 0) {
2159 return error;
2160 }
2161
2162 error = set_netsvctype_dscp_map(&custom_net_qos_dscp_map,
2163 rfc4594_netsvctype_dscp_map);
2164
2165 return error;
2166 }
2167
2168 uint8_t
custom_sc_to_dscp(uint32_t svc_class)2169 custom_sc_to_dscp(uint32_t svc_class)
2170 {
2171 uint8_t dscp = _DSCP_DF;
2172
2173 switch (svc_class) {
2174 case MBUF_SC_BK_SYS:
2175 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_BK_SYS];
2176 break;
2177 case MBUF_SC_BK:
2178 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_BK];
2179 break;
2180
2181 case MBUF_SC_BE:
2182 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_BE];
2183 break;
2184 case MBUF_SC_RD:
2185 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_RD];
2186 break;
2187 case MBUF_SC_OAM:
2188 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_OAM];
2189 break;
2190
2191 case MBUF_SC_AV:
2192 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_AV];
2193 break;
2194 case MBUF_SC_RV:
2195 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_RV];
2196 break;
2197 case MBUF_SC_VI:
2198 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_VI];
2199 break;
2200 case MBUF_SC_SIG:
2201 dscp = custom_net_qos_dscp_map.netsvctype_to_dscp[NET_SERVICE_TYPE_SIG];
2202 break;
2203
2204 case MBUF_SC_VO:
2205 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_VO];
2206 break;
2207 case MBUF_SC_CTL:
2208 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_CTL];
2209 break;
2210 default:
2211 break;
2212 }
2213 return dscp;
2214 }
2215 #endif /* (DEBUG || DEVELOPMENT) */
2216