xref: /xnu-8020.101.4/libkern/c++/OSUnserializeXML.y (revision e7776783b89a353188416a9a346c6cdb4928faad)
1 /*
2  * Copyright (c) 1999-2019 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * HISTORY
31  *
32  * OSUnserializeXML.y created by rsulack on Tue Oct 12 1999
33  */
34 
35 // parser for unserializing OSContainer objects serialized to XML
36 //
37 // to build :
38 //	bison -p OSUnserializeXML OSUnserializeXML.y
39 //	head -50 OSUnserializeXML.y > OSUnserializeXML.cpp
40 //	sed -e "s/#include <stdio.h>//" < OSUnserializeXML.tab.c >> OSUnserializeXML.cpp
41 //
42 //	when changing code check in both OSUnserializeXML.y and OSUnserializeXML.cpp
43 //
44 //
45 //
46 //
47 //
48 //		 DO NOT EDIT OSUnserializeXML.cpp!
49 //
50 //			this means you!
51 //
52 //
53 //
54 //
55 //
56 //
57 
58 
59 %pure_parser
60 
61 %{
62 #include <string.h>
63 #include <libkern/c++/OSMetaClass.h>
64 #include <libkern/c++/OSContainers.h>
65 #include <libkern/c++/OSLib.h>
66 
67 #define MAX_OBJECTS              131071
68 #define MAX_REFED_OBJECTS        65535
69 
70 #define YYSTYPE object_t *
71 #define YYPARSE_PARAM   state
72 #define YYLEX_PARAM     (parser_state_t *)state
73 
74 // this is the internal struct used to hold objects on parser stack
75 // it represents objects both before and after they have been created
76 typedef struct object {
77 	struct object   *next;
78 	struct object   *free;
79 	struct object   *elements;
80 	OSObject        *object;
81 	OSSymbol        *key;                   // for dictionary
82 	int             size;
83 	void            *data;                  // for data
84 	char            *string;                // for string & symbol
85 	long long       number;                 // for number
86 	int             idref;
87 } object_t;
88 
89 // this code is reentrant, this structure contains all
90 // state information for the parsing of a single buffer
91 typedef struct parser_state {
92 	const char      *parseBuffer;           // start of text to be parsed
93 	int             parseBufferIndex;       // current index into text
94 	int             lineNumber;             // current line number
95 	object_t        *objects;               // internal objects in use
96 	object_t        *freeObjects;           // internal objects that are free
97 	OSDictionary    *tags;                  // used to remember "ID" tags
98 	OSString        **errorString;          // parse error with line
99 	OSObject        *parsedObject;          // resultant object of parsed text
100 	int             parsedObjectCount;
101 	int             retrievedObjectCount;
102 } parser_state_t;
103 
104 #define STATE           ((parser_state_t *)state)
105 
106 #undef yyerror
107 #define yyerror(s)      OSUnserializeerror(STATE, (s))
108 static int              OSUnserializeerror(parser_state_t *state, const char *s);
109 
110 static int              yylex(YYSTYPE *lvalp, parser_state_t *state);
111 
112 static object_t         *newObject(parser_state_t *state);
113 static void             freeObject(parser_state_t *state, object_t *o);
114 static void             rememberObject(parser_state_t *state, int tag, OSObject *o);
115 static object_t         *retrieveObject(parser_state_t *state, int tag);
116 static void             cleanupObjects(parser_state_t *state);
117 
118 static object_t         *buildDictionary(parser_state_t *state, object_t *o);
119 static object_t         *buildArray(parser_state_t *state, object_t *o);
120 static object_t         *buildSet(parser_state_t *state, object_t *o);
121 static object_t         *buildString(parser_state_t *state, object_t *o);
122 static object_t         *buildSymbol(parser_state_t *state, object_t *o);
123 static object_t         *buildData(parser_state_t *state, object_t *o);
124 static object_t         *buildNumber(parser_state_t *state, object_t *o);
125 static object_t         *buildBoolean(parser_state_t *state, object_t *o);
126 
127 __BEGIN_DECLS
128 #include <kern/kalloc.h>
129 __END_DECLS
130 
131 #define malloc(size) malloc_impl(size)
132 static inline void *
malloc_impl(size_t size)133 malloc_impl(size_t size)
134 {
135 	if (size == 0) {
136 		return NULL;
137 	}
138 	return kheap_alloc_tag_bt(KHEAP_DEFAULT, size,
139 	           (zalloc_flags_t) (Z_WAITOK | Z_ZERO),
140 	           VM_KERN_MEMORY_LIBKERN);
141 }
142 
143 #define free(addr) free_impl(addr)
144 static inline void
free_impl(void * addr)145 free_impl(void *addr)
146 {
147 	kheap_free_addr(KHEAP_DEFAULT, addr);
148 }
149 static inline void
safe_free(void * addr,size_t size)150 safe_free(void *addr, size_t size)
151 {
152   if(addr) {
153     assert(size != 0);
154     kheap_free(KHEAP_DEFAULT, addr, size);
155   }
156 }
157 
158 #define realloc(addr, osize, nsize) realloc_impl(addr, osize, nsize)
159 static inline void *
realloc_impl(void * addr,size_t osize,size_t nsize)160 realloc_impl(void *addr, size_t osize, size_t nsize)
161 {
162 	if (!addr) {
163 		return malloc(nsize);
164 	}
165 	if (nsize == osize) {
166 		return addr;
167 	}
168 	void *nmem = malloc(nsize);
169 	if (!nmem) {
170 		safe_free(addr, osize);
171 		return NULL;
172 	}
173 	(void)memcpy(nmem, addr, (nsize > osize) ? osize : nsize);
174 	safe_free(addr, osize);
175 
176 	return nmem;
177 }
178 
179 %}
180 %token ARRAY
181 %token BOOLEAN
182 %token DATA
183 %token DICTIONARY
184 %token IDREF
185 %token KEY
186 %token NUMBER
187 %token SET
188 %token STRING
189 %token SYNTAX_ERROR
190 %% /* Grammar rules and actions follow */
191 
192 input:	  /* empty */		{ yyerror("unexpected end of buffer");
193 				  YYERROR;
194 				}
195 	| object		{ STATE->parsedObject = $1->object;
196 				  $1->object = 0;
197 				  freeObject(STATE, $1);
198 				  YYACCEPT;
199 				}
200 	| SYNTAX_ERROR		{ yyerror("syntax error");
201 				  YYERROR;
202 				}
203 	;
204 
205 object:	  dict			{ $$ = buildDictionary(STATE, $1);
206 
207 				  if (!yyval->object) {
208 				    yyerror("buildDictionary");
209 				    YYERROR;
210 				  }
211 				  STATE->parsedObjectCount++;
212 				  if (STATE->parsedObjectCount > MAX_OBJECTS) {
213 				    yyerror("maximum object count");
214 				    YYERROR;
215 				  }
216 				}
217 	| array			{ $$ = buildArray(STATE, $1);
218 
219 				  if (!yyval->object) {
220 				    yyerror("buildArray");
221 				    YYERROR;
222 				  }
223 				  STATE->parsedObjectCount++;
224 				  if (STATE->parsedObjectCount > MAX_OBJECTS) {
225 				    yyerror("maximum object count");
226 				    YYERROR;
227 				  }
228 				}
229 	| set			{ $$ = buildSet(STATE, $1);
230 
231 				  if (!yyval->object) {
232 				    yyerror("buildSet");
233 				    YYERROR;
234 				  }
235 				  STATE->parsedObjectCount++;
236 				  if (STATE->parsedObjectCount > MAX_OBJECTS) {
237 				    yyerror("maximum object count");
238 				    YYERROR;
239 				  }
240 				}
241 	| string		{ $$ = buildString(STATE, $1);
242 
243 				  if (!yyval->object) {
244 				    yyerror("buildString");
245 				    YYERROR;
246 				  }
247 				  STATE->parsedObjectCount++;
248 				  if (STATE->parsedObjectCount > MAX_OBJECTS) {
249 				    yyerror("maximum object count");
250 				    YYERROR;
251 				  }
252 				}
253 	| data			{ $$ = buildData(STATE, $1);
254 
255 				  if (!yyval->object) {
256 				    yyerror("buildData");
257 				    YYERROR;
258 				  }
259 				  STATE->parsedObjectCount++;
260 				  if (STATE->parsedObjectCount > MAX_OBJECTS) {
261 				    yyerror("maximum object count");
262 				    YYERROR;
263 				  }
264 				}
265 	| number		{ $$ = buildNumber(STATE, $1);
266 
267 				  if (!yyval->object) {
268 				    yyerror("buildNumber");
269 				    YYERROR;
270 				  }
271 				  STATE->parsedObjectCount++;
272 				  if (STATE->parsedObjectCount > MAX_OBJECTS) {
273 				    yyerror("maximum object count");
274 				    YYERROR;
275 				  }
276 				}
277 	| boolean		{ $$ = buildBoolean(STATE, $1);
278 
279 				  if (!yyval->object) {
280 				    yyerror("buildBoolean");
281 				    YYERROR;
282 				  }
283 				  STATE->parsedObjectCount++;
284 				  if (STATE->parsedObjectCount > MAX_OBJECTS) {
285 				    yyerror("maximum object count");
286 				    YYERROR;
287 				  }
288 				}
289 	| idref			{ $$ = retrieveObject(STATE, $1->idref);
290 				  if ($$) {
291 				    STATE->retrievedObjectCount++;
292 				    $$->object->retain();
293 				    if (STATE->retrievedObjectCount > MAX_REFED_OBJECTS) {
294 				      yyerror("maximum object reference count");
295 				      YYERROR;
296 				    }
297 				  } else {
298 				    yyerror("forward reference detected");
299 				    YYERROR;
300 				  }
301 				  freeObject(STATE, $1);
302 
303 				  STATE->parsedObjectCount++;
304 				  if (STATE->parsedObjectCount > MAX_OBJECTS) {
305 				    yyerror("maximum object count");
306 				    YYERROR;
307 				  }
308 				}
309 	;
310 
311 //------------------------------------------------------------------------------
312 
313 dict:	  '{' '}'		{ $$ = $1;
314 				  $$->elements = NULL;
315 				}
316 	| '{' pairs '}'		{ $$ = $1;
317 				  $$->elements = $2;
318 				}
319 	| DICTIONARY
320 	;
321 
322 pairs:	  pair
323 	| pairs pair		{ $$ = $2;
324 				  $$->next = $1;
325 
326 				  object_t *o;
327 				  o = $$->next;
328 				  while (o) {
329 				    if (o->key == $$->key) {
330 				      yyerror("duplicate dictionary key");
331 				      YYERROR;
332 				    }
333 				    o = o->next;
334 				  }
335 				}
336 	;
337 
338 pair:	  key object		{ $$ = $1;
339 				  $$->key = (OSSymbol *)$$->object;
340 				  $$->object = $2->object;
341 				  $$->next = NULL;
342 				  $2->object = 0;
343 				  freeObject(STATE, $2);
344 				}
345 	;
346 
347 key:	  KEY			{ $$ = buildSymbol(STATE, $1);
348 
349 //				  STATE->parsedObjectCount++;
350 //				  if (STATE->parsedObjectCount > MAX_OBJECTS) {
351 //				    yyerror("maximum object count");
352 //				    YYERROR;
353 //				  }
354 				}
355 	;
356 
357 //------------------------------------------------------------------------------
358 
359 array:	  '(' ')'		{ $$ = $1;
360 				  $$->elements = NULL;
361 				}
362 	| '(' elements ')'	{ $$ = $1;
363 				  $$->elements = $2;
364 				}
365 	| ARRAY
366 	;
367 
368 set:	  '[' ']'		{ $$ = $1;
369 				  $$->elements = NULL;
370 				}
371 	| '[' elements ']'	{ $$ = $1;
372 				  $$->elements = $2;
373 				}
374 	| SET
375 	;
376 
377 elements: object		{ $$ = $1;
378 				  $$->next = NULL;
379 				}
380 	| elements object	{ $$ = $2;
381 				  $$->next = $1;
382 				}
383 	;
384 
385 //------------------------------------------------------------------------------
386 
387 boolean:  BOOLEAN
388 	;
389 
390 data:	  DATA
391 	;
392 
393 idref:	  IDREF
394 	;
395 
396 number:	  NUMBER
397 	;
398 
399 string:	  STRING
400 	;
401 
402 %%
403 
404 int
405 OSUnserializeerror(parser_state_t * state, const char *s)  /* Called by yyparse on errors */
406 {
407 	if (state->errorString) {
408 		char tempString[128];
409 		snprintf(tempString, 128, "OSUnserializeXML: %s near line %d\n", s, state->lineNumber);
410 		*(state->errorString) = OSString::withCString(tempString);
411 	}
412 
413 	return 0;
414 }
415 
416 #define TAG_MAX_LENGTH          32
417 #define TAG_MAX_ATTRIBUTES      32
418 #define TAG_BAD                 0
419 #define TAG_START               1
420 #define TAG_END                 2
421 #define TAG_EMPTY               3
422 #define TAG_IGNORE              4
423 
424 #define currentChar()   (state->parseBuffer[state->parseBufferIndex])
425 #define nextChar()      (state->parseBuffer[++state->parseBufferIndex])
426 #define prevChar()      (state->parseBuffer[state->parseBufferIndex - 1])
427 
428 #define isSpace(c)      ((c) == ' ' || (c) == '\t')
429 #define isAlpha(c)      (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z'))
430 #define isDigit(c)      ((c) >= '0' && (c) <= '9')
431 #define isAlphaDigit(c) ((c) >= 'a' && (c) <= 'f')
432 #define isHexDigit(c)   (isDigit(c) || isAlphaDigit(c))
433 #define isAlphaNumeric(c) (isAlpha(c) || isDigit(c) || ((c) == '-'))
434 
435 static int
getTag(parser_state_t * state,char tag[TAG_MAX_LENGTH],int * attributeCount,char attributes[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH],char values[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH])436 getTag(parser_state_t *state,
437     char tag[TAG_MAX_LENGTH],
438     int *attributeCount,
439     char attributes[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH],
440     char values[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH] )
441 {
442 	int length = 0;
443 	int c = currentChar();
444 	int tagType = TAG_START;
445 
446 	*attributeCount = 0;
447 
448 	if (c != '<') {
449 		return TAG_BAD;
450 	}
451 	c = nextChar();         // skip '<'
452 
453 
454 	// <!TAG   declarations     >
455 	// <!--     comments      -->
456 	if (c == '!') {
457 		c = nextChar();
458 		bool isComment = (c == '-') && ((c = nextChar()) != 0) && (c == '-');
459 		if (!isComment && !isAlpha(c)) {
460 			return TAG_BAD;                      // <!1, <!-A, <!eos
461 		}
462 		while (c && (c = nextChar()) != 0) {
463 			if (c == '\n') {
464 				state->lineNumber++;
465 			}
466 			if (isComment) {
467 				if (c != '-') {
468 					continue;
469 				}
470 				c = nextChar();
471 				if (c != '-') {
472 					continue;
473 				}
474 				c = nextChar();
475 			}
476 			if (c == '>') {
477 				(void)nextChar();
478 				return TAG_IGNORE;
479 			}
480 			if (isComment) {
481 				break;
482 			}
483 		}
484 		return TAG_BAD;
485 	} else
486 	// <? Processing Instructions  ?>
487 	if (c == '?') {
488 		while ((c = nextChar()) != 0) {
489 			if (c == '\n') {
490 				state->lineNumber++;
491 			}
492 			if (c != '?') {
493 				continue;
494 			}
495 			c = nextChar();
496 			if (!c) {
497 				return TAG_IGNORE;
498 			}
499 			if (c == '>') {
500 				(void)nextChar();
501 				return TAG_IGNORE;
502 			}
503 		}
504 		return TAG_BAD;
505 	} else
506 	// </ end tag >
507 	if (c == '/') {
508 		c = nextChar();         // skip '/'
509 		tagType = TAG_END;
510 	}
511 	if (!isAlpha(c)) {
512 		return TAG_BAD;
513 	}
514 
515 	/* find end of tag while copying it */
516 	while (isAlphaNumeric(c)) {
517 		tag[length++] = c;
518 		c = nextChar();
519 		if (length >= (TAG_MAX_LENGTH - 1)) {
520 			return TAG_BAD;
521 		}
522 	}
523 
524 	tag[length] = 0;
525 
526 //	printf("tag %s, type %d\n", tag, tagType);
527 
528 	// look for attributes of the form attribute = "value" ...
529 	while ((c != '>') && (c != '/')) {
530 		while (isSpace(c)) {
531 			c = nextChar();
532 		}
533 
534 		length = 0;
535 		while (isAlphaNumeric(c)) {
536 			attributes[*attributeCount][length++] = c;
537 			if (length >= (TAG_MAX_LENGTH - 1)) {
538 				return TAG_BAD;
539 			}
540 			c = nextChar();
541 		}
542 		attributes[*attributeCount][length] = 0;
543 
544 		while (isSpace(c)) {
545 			c = nextChar();
546 		}
547 
548 		if (c != '=') {
549 			return TAG_BAD;
550 		}
551 		c = nextChar();
552 
553 		while (isSpace(c)) {
554 			c = nextChar();
555 		}
556 
557 		if (c != '"') {
558 			return TAG_BAD;
559 		}
560 		c = nextChar();
561 		length = 0;
562 		while (c != '"') {
563 			values[*attributeCount][length++] = c;
564 			if (length >= (TAG_MAX_LENGTH - 1)) {
565 				return TAG_BAD;
566 			}
567 			c = nextChar();
568 			if (!c) {
569 				return TAG_BAD;
570 			}
571 		}
572 		values[*attributeCount][length] = 0;
573 
574 		c = nextChar(); // skip closing quote
575 
576 //		printf("	attribute '%s' = '%s', nextchar = '%c'\n",
577 //		       attributes[*attributeCount], values[*attributeCount], c);
578 
579 		(*attributeCount)++;
580 		if (*attributeCount >= TAG_MAX_ATTRIBUTES) {
581 			return TAG_BAD;
582 		}
583 	}
584 
585 	if (c == '/') {
586 		c = nextChar();         // skip '/'
587 		tagType = TAG_EMPTY;
588 	}
589 	if (c != '>') {
590 		return TAG_BAD;
591 	}
592 	c = nextChar();         // skip '>'
593 
594 	return tagType;
595 }
596 
597 static char *
getString(parser_state_t * state,int * alloc_lengthp)598 getString(parser_state_t *state, int *alloc_lengthp)
599 {
600 	int c = currentChar();
601 	int start, length, i, j;
602 	char * tempString;
603 
604 	start = state->parseBufferIndex;
605 	/* find end of string */
606 
607 	while (c != 0) {
608 		if (c == '\n') {
609 			state->lineNumber++;
610 		}
611 		if (c == '<') {
612 			break;
613 		}
614 		c = nextChar();
615 	}
616 
617 	if (c != '<') {
618 		return 0;
619 	}
620 
621 	length = state->parseBufferIndex - start;
622 
623 	/* copy to null terminated buffer */
624 	tempString = (char *)malloc(length + 1);
625 	if (tempString == NULL) {
626 		printf("OSUnserializeXML: can't alloc temp memory\n");
627 		goto error;
628 	}
629 	if (alloc_lengthp != NULL) {
630 		*alloc_lengthp = length + 1;
631 	}
632 
633 	// copy out string in tempString
634 	// "&amp;" -> '&', "&lt;" -> '<', "&gt;" -> '>'
635 
636 	i = j = 0;
637 	while (i < length) {
638 		c = state->parseBuffer[start + i++];
639 		if (c != '&') {
640 			tempString[j++] = c;
641 		} else {
642 			if ((i + 3) > length) {
643 				goto error;
644 			}
645 			c = state->parseBuffer[start + i++];
646 			if (c == 'l') {
647 				if (state->parseBuffer[start + i++] != 't') {
648 					goto error;
649 				}
650 				if (state->parseBuffer[start + i++] != ';') {
651 					goto error;
652 				}
653 				tempString[j++] = '<';
654 				continue;
655 			}
656 			if (c == 'g') {
657 				if (state->parseBuffer[start + i++] != 't') {
658 					goto error;
659 				}
660 				if (state->parseBuffer[start + i++] != ';') {
661 					goto error;
662 				}
663 				tempString[j++] = '>';
664 				continue;
665 			}
666 			if ((i + 3) > length) {
667 				goto error;
668 			}
669 			if (c == 'a') {
670 				if (state->parseBuffer[start + i++] != 'm') {
671 					goto error;
672 				}
673 				if (state->parseBuffer[start + i++] != 'p') {
674 					goto error;
675 				}
676 				if (state->parseBuffer[start + i++] != ';') {
677 					goto error;
678 				}
679 				tempString[j++] = '&';
680 				continue;
681 			}
682 			goto error;
683 		}
684 	}
685 	tempString[j] = 0;
686 
687 //	printf("string %s\n", tempString);
688 
689 	return tempString;
690 
691 error:
692 	if (tempString) {
693 		safe_free(tempString, length + 1);
694 		if (alloc_lengthp != NULL) {
695 			*alloc_lengthp = 0;
696 		}
697 	}
698 	return 0;
699 }
700 
701 static long long
getNumber(parser_state_t * state)702 getNumber(parser_state_t *state)
703 {
704 	unsigned long long n = 0;
705 	int base = 10;
706 	bool negate = false;
707 	int c = currentChar();
708 
709 	if (c == '0') {
710 		c = nextChar();
711 		if (c == 'x') {
712 			base = 16;
713 			c = nextChar();
714 		}
715 	}
716 	if (base == 10) {
717 		if (c == '-') {
718 			negate = true;
719 			c = nextChar();
720 		}
721 		while (isDigit(c)) {
722 			n = (n * base + c - '0');
723 			c = nextChar();
724 		}
725 		if (negate) {
726 			n = (unsigned long long)((long long)n * (long long)-1);
727 		}
728 	} else {
729 		while (isHexDigit(c)) {
730 			if (isDigit(c)) {
731 				n = (n * base + c - '0');
732 			} else {
733 				n = (n * base + 0xa + c - 'a');
734 			}
735 			c = nextChar();
736 		}
737 	}
738 //	printf("number 0x%x\n", (unsigned long)n);
739 	return n;
740 }
741 
742 // taken from CFXMLParsing/CFPropertyList.c
743 
744 static const signed char __CFPLDataDecodeTable[128] = {
745 	/* 000 */ -1, -1, -1, -1, -1, -1, -1, -1,
746 	/* 010 */ -1, -1, -1, -1, -1, -1, -1, -1,
747 	/* 020 */ -1, -1, -1, -1, -1, -1, -1, -1,
748 	/* 030 */ -1, -1, -1, -1, -1, -1, -1, -1,
749 	/* ' ' */ -1, -1, -1, -1, -1, -1, -1, -1,
750 	/* '(' */ -1, -1, -1, 62, -1, -1, -1, 63,
751 	/* '0' */ 52, 53, 54, 55, 56, 57, 58, 59,
752 	/* '8' */ 60, 61, -1, -1, -1, 0, -1, -1,
753 	/* '@' */ -1, 0, 1, 2, 3, 4, 5, 6,
754 	/* 'H' */ 7, 8, 9, 10, 11, 12, 13, 14,
755 	/* 'P' */ 15, 16, 17, 18, 19, 20, 21, 22,
756 	/* 'X' */ 23, 24, 25, -1, -1, -1, -1, -1,
757 	/* '`' */ -1, 26, 27, 28, 29, 30, 31, 32,
758 	/* 'h' */ 33, 34, 35, 36, 37, 38, 39, 40,
759 	/* 'p' */ 41, 42, 43, 44, 45, 46, 47, 48,
760 	/* 'x' */ 49, 50, 51, -1, -1, -1, -1, -1
761 };
762 
763 #define DATA_ALLOC_SIZE 4096
764 
765 static void *
getCFEncodedData(parser_state_t * state,unsigned int * size)766 getCFEncodedData(parser_state_t *state, unsigned int *size)
767 {
768 	int numeq = 0, cntr = 0;
769 	unsigned int acc = 0;
770 	int tmpbufpos = 0;
771 	size_t tmpbuflen = DATA_ALLOC_SIZE;
772 	unsigned char *tmpbuf = (unsigned char *)malloc(tmpbuflen);
773 
774 	int c = currentChar();
775 	*size = 0;
776 
777 	while (c != '<') {
778 		c &= 0x7f;
779 		if (c == 0) {
780 			safe_free(tmpbuf, tmpbuflen);
781 			return 0;
782 		}
783 		if (c == '=') {
784 			numeq++;
785 		} else {
786 			numeq = 0;
787 		}
788 		if (c == '\n') {
789 			state->lineNumber++;
790 		}
791 		if (__CFPLDataDecodeTable[c] < 0) {
792 			c = nextChar();
793 			continue;
794 		}
795 		cntr++;
796 		acc <<= 6;
797 		acc += __CFPLDataDecodeTable[c];
798 		if (0 == (cntr & 0x3)) {
799 			if (tmpbuflen <= tmpbufpos + 2) {
800 				size_t oldsize = tmpbuflen;
801 				tmpbuflen += DATA_ALLOC_SIZE;
802 				tmpbuf = (unsigned char *)realloc(tmpbuf, oldsize, tmpbuflen);
803 			}
804 			tmpbuf[tmpbufpos++] = (acc >> 16) & 0xff;
805 			if (numeq < 2) {
806 				tmpbuf[tmpbufpos++] = (acc >> 8) & 0xff;
807 			}
808 			if (numeq < 1) {
809 				tmpbuf[tmpbufpos++] = acc & 0xff;
810 			}
811 		}
812 		c = nextChar();
813 	}
814 	*size = tmpbufpos;
815 	if (*size == 0) {
816 		safe_free(tmpbuf, tmpbuflen);
817 		return 0;
818 	}
819 	return tmpbuf;
820 }
821 
822 static void *
getHexData(parser_state_t * state,unsigned int * size)823 getHexData(parser_state_t *state, unsigned int *size)
824 {
825 	int c;
826 	unsigned char *d, *start, *lastStart;
827 
828 	size_t buflen = DATA_ALLOC_SIZE;
829 	start = lastStart = d = (unsigned char *)malloc(buflen);
830 	c = currentChar();
831 
832 	while (c != '<') {
833 		if (isSpace(c)) {
834 			while ((c = nextChar()) != 0 && isSpace(c)) {
835 			}
836 		}
837 		;
838 		if (c == '\n') {
839 			state->lineNumber++;
840 			c = nextChar();
841 			continue;
842 		}
843 
844 		// get high nibble
845 		if (isDigit(c)) {
846 			*d = (c - '0') << 4;
847 		} else if (isAlphaDigit(c)) {
848 			*d =  (0xa + (c - 'a')) << 4;
849 		} else {
850 			goto error;
851 		}
852 
853 		// get low nibble
854 		c = nextChar();
855 		if (isDigit(c)) {
856 			*d |= c - '0';
857 		} else if (isAlphaDigit(c)) {
858 			*d |= 0xa + (c - 'a');
859 		} else {
860 			goto error;
861 		}
862 
863 		d++;
864 		if ((d - lastStart) >= DATA_ALLOC_SIZE) {
865 			int oldsize = d - start;
866 			assert(oldsize == buflen);
867 			buflen += DATA_ALLOC_SIZE;
868 			start = (unsigned char *)realloc(start, oldsize, buflen);
869 			d = lastStart = start + oldsize;
870 		}
871 		c = nextChar();
872 	}
873 
874 	*size = d - start;
875 	return start;
876 
877 error:
878 
879 	*size = 0;
880 	safe_free(start, buflen);
881 	return 0;
882 }
883 
884 static int
yylex(YYSTYPE * lvalp,parser_state_t * state)885 yylex(YYSTYPE *lvalp, parser_state_t *state)
886 {
887 	int c, i;
888 	int tagType;
889 	char tag[TAG_MAX_LENGTH];
890 	int attributeCount;
891 	char attributes[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH];
892 	char values[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH];
893 	object_t *object;
894 	int alloc_length;
895 
896 top:
897 	c = currentChar();
898 
899 	/* skip white space  */
900 	if (isSpace(c)) {
901 		while ((c = nextChar()) != 0 && isSpace(c)) {
902 		}
903 	}
904 	;
905 
906 	/* keep track of line number, don't return \n's */
907 	if (c == '\n') {
908 		STATE->lineNumber++;
909 		(void)nextChar();
910 		goto top;
911 	}
912 
913 	// end of the buffer?
914 	if (!c) {
915 		return 0;
916 	}
917 
918 	tagType = getTag(STATE, tag, &attributeCount, attributes, values);
919 	if (tagType == TAG_BAD) {
920 		return SYNTAX_ERROR;
921 	}
922 	if (tagType == TAG_IGNORE) {
923 		goto top;
924 	}
925 
926 	// handle allocation and check for "ID" and "IDREF" tags up front
927 	*lvalp = object = newObject(STATE);
928 	object->idref = -1;
929 	for (i = 0; i < attributeCount; i++) {
930 		if (attributes[i][0] == 'I' && attributes[i][1] == 'D') {
931 			// check for idref's, note: we ignore the tag, for
932 			// this to work correctly, all idrefs must be unique
933 			// across the whole serialization
934 			if (attributes[i][2] == 'R' && attributes[i][3] == 'E' &&
935 			    attributes[i][4] == 'F' && !attributes[i][5]) {
936 				if (tagType != TAG_EMPTY) {
937 					return SYNTAX_ERROR;
938 				}
939 				object->idref = strtol(values[i], NULL, 0);
940 				return IDREF;
941 			}
942 			// check for id's
943 			if (!attributes[i][2]) {
944 				object->idref = strtol(values[i], NULL, 0);
945 			} else {
946 				return SYNTAX_ERROR;
947 			}
948 		}
949 	}
950 
951 	switch (*tag) {
952 	case 'a':
953 		if (!strcmp(tag, "array")) {
954 			if (tagType == TAG_EMPTY) {
955 				object->elements = NULL;
956 				return ARRAY;
957 			}
958 			return (tagType == TAG_START) ? '(' : ')';
959 		}
960 		break;
961 	case 'd':
962 		if (!strcmp(tag, "dict")) {
963 			if (tagType == TAG_EMPTY) {
964 				object->elements = NULL;
965 				return DICTIONARY;
966 			}
967 			return (tagType == TAG_START) ? '{' : '}';
968 		}
969 		if (!strcmp(tag, "data")) {
970 			unsigned int size;
971 			if (tagType == TAG_EMPTY) {
972 				object->data = NULL;
973 				object->size = 0;
974 				return DATA;
975 			}
976 
977 			bool isHexFormat = false;
978 			for (i = 0; i < attributeCount; i++) {
979 				if (!strcmp(attributes[i], "format") && !strcmp(values[i], "hex")) {
980 					isHexFormat = true;
981 					break;
982 				}
983 			}
984 			// CF encoded is the default form
985 			if (isHexFormat) {
986 				object->data = getHexData(STATE, &size);
987 			} else {
988 				object->data = getCFEncodedData(STATE, &size);
989 			}
990 			object->size = size;
991 			if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END) || strcmp(tag, "data")) {
992 				return SYNTAX_ERROR;
993 			}
994 			return DATA;
995 		}
996 		break;
997 	case 'f':
998 		if (!strcmp(tag, "false")) {
999 			if (tagType == TAG_EMPTY) {
1000 				object->number = 0;
1001 				return BOOLEAN;
1002 			}
1003 		}
1004 		break;
1005 	case 'i':
1006 		if (!strcmp(tag, "integer")) {
1007 			object->size = 64;      // default
1008 			for (i = 0; i < attributeCount; i++) {
1009 				if (!strcmp(attributes[i], "size")) {
1010 					object->size = strtoul(values[i], NULL, 0);
1011 				}
1012 			}
1013 			if (tagType == TAG_EMPTY) {
1014 				object->number = 0;
1015 				return NUMBER;
1016 			}
1017 			object->number = getNumber(STATE);
1018 			if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END) || strcmp(tag, "integer")) {
1019 				return SYNTAX_ERROR;
1020 			}
1021 			return NUMBER;
1022 		}
1023 		break;
1024 	case 'k':
1025 		if (!strcmp(tag, "key")) {
1026 			if (tagType == TAG_EMPTY) {
1027 				return SYNTAX_ERROR;
1028 			}
1029 			object->string = getString(STATE, &alloc_length);
1030 			if (!object->string) {
1031 				return SYNTAX_ERROR;
1032 			}
1033 			object->string_alloc_length = alloc_length;
1034 			if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END)
1035 			    || strcmp(tag, "key")) {
1036 				return SYNTAX_ERROR;
1037 			}
1038 			return KEY;
1039 		}
1040 		break;
1041 	case 'p':
1042 		if (!strcmp(tag, "plist")) {
1043 			freeObject(STATE, object);
1044 			goto top;
1045 		}
1046 		break;
1047 	case 's':
1048 		if (!strcmp(tag, "string")) {
1049 			if (tagType == TAG_EMPTY) {
1050 				object->string = (char *)malloc(1);
1051 				object->string[0] = 0;
1052 				object->string_alloc_length = 1;
1053 				return STRING;
1054 			}
1055 			object->string = getString(STATE, &alloc_length);
1056 			if (!object->string) {
1057 				return SYNTAX_ERROR;
1058 			}
1059 			object->string_alloc_length = alloc_length;
1060 			if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END)
1061 			    || strcmp(tag, "string")) {
1062 				return SYNTAX_ERROR;
1063 			}
1064 			return STRING;
1065 		}
1066 		if (!strcmp(tag, "set")) {
1067 			if (tagType == TAG_EMPTY) {
1068 				object->elements = NULL;
1069 				return SET;;
1070 			}
1071 			if (tagType == TAG_START) {
1072 				return '[';
1073 			} else {
1074 				return ']';
1075 			}
1076 		}
1077 		break;
1078 	case 't':
1079 		if (!strcmp(tag, "true")) {
1080 			if (tagType == TAG_EMPTY) {
1081 				object->number = 1;
1082 				return BOOLEAN;
1083 			}
1084 		}
1085 		break;
1086 	}
1087 
1088 	return SYNTAX_ERROR;
1089 }
1090 
1091 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
1092 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
1093 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
1094 
1095 // "java" like allocation, if this code hits a syntax error in the
1096 // the middle of the parsed string we just bail with pointers hanging
1097 // all over place, this code helps keeps it all together
1098 
1099 //static int object_count = 0;
1100 
1101 object_t *
newObject(parser_state_t * state)1102 newObject(parser_state_t *state)
1103 {
1104 	object_t *o;
1105 
1106 	if (state->freeObjects) {
1107 		o = state->freeObjects;
1108 		state->freeObjects = state->freeObjects->next;
1109 	} else {
1110 		o = (object_t *)malloc(sizeof(object_t));
1111 //		object_count++;
1112 		o->free = state->objects;
1113 		state->objects = o;
1114 	}
1115 
1116 	return o;
1117 }
1118 
1119 void
freeObject(parser_state_t * state,object_t * o)1120 freeObject(parser_state_t * state, object_t *o)
1121 {
1122 	o->next = state->freeObjects;
1123 	state->freeObjects = o;
1124 }
1125 
1126 void
cleanupObjects(parser_state_t * state)1127 cleanupObjects(parser_state_t *state)
1128 {
1129 	object_t *t, *o = state->objects;
1130 
1131 	while (o) {
1132 		if (o->object) {
1133 //			printf("OSUnserializeXML: releasing object o=%x object=%x\n", (int)o, (int)o->object);
1134 			o->object->release();
1135 		}
1136 		if (o->data) {
1137 //			printf("OSUnserializeXML: freeing   object o=%x data=%x\n", (int)o, (int)o->data);
1138 			free(o->data);
1139 		}
1140 		if (o->key) {
1141 //			printf("OSUnserializeXML: releasing object o=%x key=%x\n", (int)o, (int)o->key);
1142 			o->key->release();
1143 		}
1144 		if (o->string) {
1145 //			printf("OSUnserializeXML: freeing   object o=%x string=%x\n", (int)o, (int)o->string);
1146 			free(o->string);
1147 		}
1148 
1149 		t = o;
1150 		o = o->free;
1151 		safe_free(t, sizeof(object_t));
1152 //		object_count--;
1153 	}
1154 //	printf("object_count = %d\n", object_count);
1155 }
1156 
1157 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
1158 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
1159 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
1160 
1161 static void
rememberObject(parser_state_t * state,int tag,OSObject * o)1162 rememberObject(parser_state_t *state, int tag, OSObject *o)
1163 {
1164 	char key[16];
1165 	snprintf(key, 16, "%u", tag);
1166 
1167 //	printf("remember key %s\n", key);
1168 
1169 	state->tags->setObject(key, o);
1170 }
1171 
1172 static object_t *
retrieveObject(parser_state_t * state,int tag)1173 retrieveObject(parser_state_t *state, int tag)
1174 {
1175 	OSObject *ref;
1176 	object_t *o;
1177 	char key[16];
1178 	snprintf(key, 16, "%u", tag);
1179 
1180 //	printf("retrieve key '%s'\n", key);
1181 
1182 	ref = state->tags->getObject(key);
1183 	if (!ref) {
1184 		return 0;
1185 	}
1186 
1187 	o = newObject(state);
1188 	o->object = ref;
1189 	return o;
1190 }
1191 
1192 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
1193 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
1194 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
1195 
1196 object_t *
buildDictionary(parser_state_t * state,object_t * header)1197 buildDictionary(parser_state_t *state, object_t * header)
1198 {
1199 	object_t *o, *t;
1200 	int count = 0;
1201 	OSDictionary *dict;
1202 
1203 	// get count and reverse order
1204 	o = header->elements;
1205 	header->elements = 0;
1206 	while (o) {
1207 		count++;
1208 		t = o;
1209 		o = o->next;
1210 
1211 		t->next = header->elements;
1212 		header->elements = t;
1213 	}
1214 
1215 	dict = OSDictionary::withCapacity(count);
1216 	if (header->idref >= 0) {
1217 		rememberObject(state, header->idref, dict);
1218 	}
1219 
1220 	o = header->elements;
1221 	while (o) {
1222 		dict->setObject(o->key, o->object);
1223 
1224 		o->key->release();
1225 		o->object->release();
1226 		o->key = 0;
1227 		o->object = 0;
1228 
1229 		t = o;
1230 		o = o->next;
1231 		freeObject(state, t);
1232 	}
1233 	o = header;
1234 	o->object = dict;
1235 	return o;
1236 };
1237 
1238 object_t *
buildArray(parser_state_t * state,object_t * header)1239 buildArray(parser_state_t *state, object_t * header)
1240 {
1241 	object_t *o, *t;
1242 	int count = 0;
1243 	OSArray *array;
1244 
1245 	// get count and reverse order
1246 	o = header->elements;
1247 	header->elements = 0;
1248 	while (o) {
1249 		count++;
1250 		t = o;
1251 		o = o->next;
1252 
1253 		t->next = header->elements;
1254 		header->elements = t;
1255 	}
1256 
1257 	array = OSArray::withCapacity(count);
1258 	if (header->idref >= 0) {
1259 		rememberObject(state, header->idref, array);
1260 	}
1261 
1262 	o = header->elements;
1263 	while (o) {
1264 		array->setObject(o->object);
1265 
1266 		o->object->release();
1267 		o->object = 0;
1268 
1269 		t = o;
1270 		o = o->next;
1271 		freeObject(state, t);
1272 	}
1273 	o = header;
1274 	o->object = array;
1275 	return o;
1276 };
1277 
1278 object_t *
buildSet(parser_state_t * state,object_t * header)1279 buildSet(parser_state_t *state, object_t *header)
1280 {
1281 	object_t *o = buildArray(state, header);
1282 
1283 	OSArray *array = (OSArray *)o->object;
1284 	OSSet *set = OSSet::withArray(array, array->getCapacity());
1285 
1286 	// write over the reference created in buildArray
1287 	if (header->idref >= 0) {
1288 		rememberObject(state, header->idref, set);
1289 	}
1290 
1291 	array->release();
1292 	o->object = set;
1293 	return o;
1294 };
1295 
1296 object_t *
buildString(parser_state_t * state,object_t * o)1297 buildString(parser_state_t *state, object_t *o)
1298 {
1299 	OSString *string;
1300 
1301 	string = OSString::withCString(o->string);
1302 	if (o->idref >= 0) {
1303 		rememberObject(state, o->idref, string);
1304 	}
1305 
1306 	free(o->string);
1307 	o->string = 0;
1308 	o->object = string;
1309 
1310 	return o;
1311 };
1312 
1313 object_t *
buildSymbol(parser_state_t * state,object_t * o)1314 buildSymbol(parser_state_t *state, object_t *o)
1315 {
1316 	OSSymbol *symbol;
1317 
1318 	symbol = const_cast < OSSymbol * > (OSSymbol::withCString(o->string));
1319 	if (o->idref >= 0) {
1320 		rememberObject(state, o->idref, symbol);
1321 	}
1322 
1323 	safe_free(o->string, strlen(o->string) + 1);
1324 	o->string = 0;
1325 	o->object = symbol;
1326 
1327 	return o;
1328 };
1329 
1330 object_t *
buildData(parser_state_t * state,object_t * o)1331 buildData(parser_state_t *state, object_t *o)
1332 {
1333 	OSData *data;
1334 
1335 	if (o->size) {
1336 		data = OSData::withBytes(o->data, o->size);
1337 	} else {
1338 		data = OSData::withCapacity(0);
1339 	}
1340 	if (o->idref >= 0) {
1341 		rememberObject(state, o->idref, data);
1342 	}
1343 
1344 	if (o->size) {
1345 		free(o->data);
1346 	}
1347 	o->data = 0;
1348 	o->object = data;
1349 	return o;
1350 };
1351 
1352 object_t *
buildNumber(parser_state_t * state,object_t * o)1353 buildNumber(parser_state_t *state, object_t *o)
1354 {
1355 	OSNumber *number = OSNumber::withNumber(o->number, o->size);
1356 
1357 	if (o->idref >= 0) {
1358 		rememberObject(state, o->idref, number);
1359 	}
1360 
1361 	o->object = number;
1362 	return o;
1363 };
1364 
1365 object_t *
buildBoolean(parser_state_t * state __unused,object_t * o)1366 buildBoolean(parser_state_t *state __unused, object_t *o)
1367 {
1368 	o->object = ((o->number == 0) ? kOSBooleanFalse : kOSBooleanTrue);
1369 	o->object->retain();
1370 	return o;
1371 };
1372 
1373 OSObject*
OSUnserializeXML(const char * buffer,OSString ** errorString)1374 OSUnserializeXML(const char *buffer, OSString **errorString)
1375 {
1376 	OSObject *object;
1377 
1378 	if (!buffer) {
1379 		return 0;
1380 	}
1381 	parser_state_t *state = (parser_state_t *)malloc(sizeof(parser_state_t));
1382 	if (!state) {
1383 		return 0;
1384 	}
1385 
1386 	// just in case
1387 	if (errorString) {
1388 		*errorString = NULL;
1389 	}
1390 
1391 	state->parseBuffer = buffer;
1392 	state->parseBufferIndex = 0;
1393 	state->lineNumber = 1;
1394 	state->objects = 0;
1395 	state->freeObjects = 0;
1396 	state->tags = OSDictionary::withCapacity(128);
1397 	state->errorString = errorString;
1398 	state->parsedObject = 0;
1399 	state->parsedObjectCount = 0;
1400 	state->retrievedObjectCount = 0;
1401 
1402 	(void)yyparse((void *)state);
1403 
1404 	object = state->parsedObject;
1405 
1406 	cleanupObjects(state);
1407 	state->tags->release();
1408 	safe_free(state, sizeof(parser_state_t));
1409 
1410 	return object;
1411 }
1412 
1413 #include <libkern/OSSerializeBinary.h>
1414 
1415 OSObject*
OSUnserializeXML(const char * buffer,size_t bufferSize,OSString ** errorString)1416 OSUnserializeXML(const char *buffer, size_t bufferSize, OSString **errorString)
1417 {
1418 	if (!buffer) {
1419 		return 0;
1420 	}
1421 	if (bufferSize < sizeof(kOSSerializeBinarySignature)) {
1422 		return 0;
1423 	}
1424 
1425 	if (!strcmp(kOSSerializeBinarySignature, buffer)
1426 	    || (kOSSerializeIndexedBinarySignature == (uint8_t)buffer[0])) {
1427 		return OSUnserializeBinary(buffer, bufferSize, errorString);
1428 	}
1429 
1430 	// XML must be null terminated
1431 	if (buffer[bufferSize - 1]) {
1432 		return 0;
1433 	}
1434 
1435 	return OSUnserializeXML(buffer, errorString);
1436 }
1437 
1438 
1439 //
1440 //
1441 //
1442 //
1443 //
1444 //		 DO NOT EDIT OSUnserializeXML.cpp!
1445 //
1446 //			this means you!
1447 //
1448 //
1449 //
1450 //
1451 //
1452