1 : /*
2 : * OpenBIOS - free your system!
3 : * ( FCode tokenizer )
4 : *
5 : * scanner.c - simple scanner for forth files.
6 : *
7 : * This program is part of a free implementation of the IEEE 1275-1994
8 : * Standard for Boot (Initialization Configuration) Firmware.
9 : *
10 : * Copyright (C) 2001-2005 by Stefan Reinauer <stepan@openbios.org>
11 : *
12 : * This program is free software; you can redistribute it and/or modify
13 : * it under the terms of the GNU General Public License as published by
14 : * the Free Software Foundation; version 2 of the License.
15 : *
16 : * This program is distributed in the hope that it will be useful,
17 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 : * GNU General Public License for more details.
20 : *
21 : * You should have received a copy of the GNU General Public License
22 : * along with this program; if not, write to the Free Software
23 : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA, 02110-1301 USA
24 : *
25 : */
26 :
27 : /* **************************************************************************
28 : * Modifications made in 2005 by IBM Corporation
29 : * (C) Copyright 2005 IBM Corporation. All Rights Reserved.
30 : * Modifications Author: David L. Paktor dlpaktor@us.ibm.com
31 : **************************************************************************** */
32 :
33 : #include <stdio.h>
34 : #include <stdlib.h>
35 : #include <unistd.h>
36 : #ifdef __GLIBC__
37 : #define __USE_XOPEN_EXTENDED
38 : #endif
39 : #include <string.h>
40 : #include <time.h>
41 : #include <ctype.h>
42 :
43 : #include "macros.h"
44 : #include "stack.h"
45 : #include "stream.h"
46 : #include "emit.h"
47 : #include "toke.h"
48 : #include "dictionary.h"
49 : #include "vocabfuncts.h"
50 : #include "scanner.h"
51 : #include "errhandler.h"
52 : #include "tokzesc.h"
53 : #include "conditl.h"
54 : #include "flowcontrol.h"
55 : #include "usersymbols.h"
56 : #include "clflags.h"
57 : #include "devnode.h"
58 : #include "tracesyms.h"
59 : #include "nextfcode.h"
60 :
61 : #include "parselocals.h"
62 :
63 : /* **************************************************************************
64 : *
65 : * Some VERY IMPORTANT global variables follow
66 : *
67 : **************************************************************************** */
68 :
69 : u8 *statbuf=NULL; /* The word just read from the input stream */
70 : u8 base=0x0a; /* The numeric-interpretation base */
71 :
72 : /* pci data */
73 : bool pci_is_last_image=TRUE;
74 : u16 pci_image_rev=0x0001; /* Vendor's Image, NOT PCI Data Structure Rev */
75 : u16 pci_vpd=0x0000;
76 :
77 :
78 : /* Having to do with the state of the tokenization */
79 : bool offs16 = TRUE; /* We are using 16-bit branch- (etc) -offsets */
80 : bool in_tokz_esc = FALSE; /* TRUE if in "Tokenizer Escape" mode */
81 : bool incolon = FALSE; /* TRUE if inside a colon definition */
82 : bool haveend = FALSE; /* TRUE if the "end" code was read. */
83 : int do_loop_depth = 0; /* How deep we are inside DO ... LOOP variants */
84 :
85 : /* Used for error-checking of IBM-style Locals */
86 : int lastcolon; /* Location in output stream of latest colon-definition. */
87 :
88 : /* Used for error reporting */
89 : char *last_colon_defname = NULL; /* Name of last colon-definition */
90 : char *last_colon_filename = NULL; /* File where last colon-def'n made */
91 : unsigned int last_colon_lineno; /* Line number of last colon-def'n */
92 : bool report_multiline = TRUE; /* False to suspend multiline warning */
93 : unsigned int last_colon_abs_token_no;
94 :
95 : /* **************************************************************************
96 : * Local variables
97 : **************************************************************************** */
98 : static u16 last_colon_fcode; /* FCode-number assigned to last colon-def'n */
99 : /* Used for RECURSE */
100 :
101 : static bool do_not_overload = TRUE ; /* False to suspend dup-name-test */
102 : static bool got_until_eof = FALSE ; /* TRUE to signal "unterminated" */
103 :
104 : static unsigned int last_colon_do_depth = 0;
105 :
106 : /* State of headered-ness for name-creation */
107 : typedef enum headeredness_t {
108 : FLAG_HEADERLESS ,
109 : FLAG_EXTERNAL ,
110 : FLAG_HEADERS } headeredness ;
111 : static headeredness hdr_flag = FLAG_HEADERLESS ; /* Init'l default state */
112 :
113 : /* Local variables having to do with: */
114 : /* ... the state of the tokenization */
115 : static bool is_instance = FALSE; /* Is "instance" is in effect? */
116 : static char *instance_filename = NULL; /* File where "instance" invoked */
117 : static unsigned int instance_lineno; /* Line number of "instance" */
118 : static bool fcode_started = FALSE ; /* Only 1 fcode_starter per block. */
119 : static bool first_fc_starter = TRUE; /* Only once per tokenization... */
120 :
121 : /* ... with the state of the input stream, */
122 : static bool need_to_pop_source;
123 :
124 : /* ... with the use of the return stack, */
125 : static int ret_stk_depth = 0; /* Return-Stack-Usage-Depth counter */
126 :
127 : /* ... and with control of error-messaging. */
128 : /* Should a warning about a dangling "instance"
129 : * be issued at the next device-node change?
130 : */
131 : static bool dev_change_instance_warning = TRUE;
132 :
133 : /* Has a gap developed between "instance" and its application? */
134 : static bool instance_definer_gap = FALSE;
135 :
136 : /* Shared phrases */
137 : static char *in_tkz_esc_mode = "in Tokenizer-Escape mode.\n";
138 :
139 :
140 : /* **************************************************************************
141 : *
142 : * Function name: skip_ws
143 : * Synopsis: Advance the PC past all whitespace.
144 : * Protect against pointer over-runs
145 : *
146 : * Inputs:
147 : * Parameters: NONE
148 : * Global Variables:
149 : * pc Input-source Scanning pointer
150 : * end End of input-source buffer
151 : *
152 : * Outputs:
153 : * Returned Value: TRUE if PC reached END before non-blank char
154 : * Global Variables:
155 : * pc Advanced to first non-blank char, or to END
156 : * lineno Incremented if encountered new-line along the way
157 : *
158 : * Error Detection:
159 : * Return a TRUE if End of input-source buffer reached before
160 : * non-blank character. Not necessarily an error; allow
161 : * calling routine to decide...
162 : *
163 : **************************************************************************** */
164 :
165 : static bool skip_ws(void)
166 126801 : {
167 126801 : bool retval = TRUE;
168 : char ch_tmp;
169 :
170 269989 : for ( ; pc < end; pc++ )
171 : {
172 268373 : ch_tmp = *pc;
173 268373 : if ( (ch_tmp != '\t') && (ch_tmp != ' ') && (ch_tmp != '\n' ) )
174 : {
175 125185 : retval = FALSE;
176 125185 : break;
177 : }
178 143188 : if ( ch_tmp == '\n') lineno++;
179 : }
180 126801 : return ( retval );
181 : }
182 :
183 : /* **************************************************************************
184 : *
185 : * Function name: skip_until
186 : * Synopsis: Advance the PC to the given character.
187 : * Do not copy anything into statbuf.
188 : * Protect against pointer over-runs
189 : *
190 : * Inputs:
191 : * Parameters:
192 : * lim_ch Limiting Character
193 : * Global Variables:
194 : * pc Input-source Scanning pointer
195 : * end End of input-source buffer
196 : *
197 : * Outputs:
198 : * Returned Value: TRUE if PC reached END before finding LIM_CH
199 : * Global Variables:
200 : * pc Advanced to first occurrence of LIM_CH, or to END
201 : * lineno Incremented if encountered new-line along the way
202 : *
203 : * Error Detection:
204 : * Return a TRUE if End of input-source buffer reached before
205 : * desired character. Not necessarily an error; allow calling
206 : * routine to decide...
207 : *
208 : **************************************************************************** */
209 :
210 : bool skip_until( char lim_ch)
211 4661 : {
212 4661 : bool retval = TRUE;
213 : char ch_tmp;
214 :
215 205731 : for ( ; pc < end; pc++ )
216 : {
217 205723 : ch_tmp = *pc;
218 205723 : if ( ch_tmp == lim_ch )
219 : {
220 4653 : retval = FALSE;
221 4653 : break;
222 : }
223 201070 : if ( ch_tmp == '\n') lineno++;
224 : }
225 4661 : return ( retval );
226 : }
227 :
228 :
229 : /* **************************************************************************
230 : *
231 : * Function name: get_until
232 : * Synopsis: Return, in statbuf, the string from PC to the first
233 : * occurrence of the given delimiter-character..
234 : *
235 : * Inputs:
236 : * Parameters:
237 : * needle The given delimiter-character
238 : * Global Variables:
239 : * pc Input-source Scanning Pointer
240 : *
241 : * Outputs:
242 : * Returned Value: Length of the string obtained
243 : * Global Variables:
244 : * statbuf The string obtained from the input stream;
245 : * does not include the delimiter-character.
246 : * pc Bumped past the delimiter-character, unless
247 : * it's a new-line, in which case leave it
248 : * to be handled by get_word()
249 : * Local Static Variables:
250 : * got_until_eof Pass this as a signal that the end of the
251 : * buffer was reached before the delimiter;
252 : * Testing whether PC has reached END is
253 : * not a sufficient indication.
254 : *
255 : * Error Detection:
256 : * If string overflows statbuf allocation, ERROR, and
257 : * return "no string" (i.e., length = 0).
258 : * Otherwise, if delimiter not found before eof, keep string.
259 : * Protection against PC pointer-over-run past END is
260 : * provided by skip_until() . Reaching END will be
261 : * handled by calling routine; pass indication along
262 : * via Local Static Variable.
263 : *
264 : * Process Explanation:
265 : * Skip the delimiter-character from further input, unless it's a
266 : * new-line which will be skipped anyway. Let skip_until()
267 : * and get_word() handle incrementing line-number counters.
268 : * If skip_until() indicated reaching end-of-file, don't bump PC
269 : *
270 : * Revision History:
271 : * Updated Thu, 14 Jul 2005 by David L. Paktor
272 : * More robust testing for when PC exceeds END
273 : * Involved replacing firstchar()
274 : *
275 : **************************************************************************** */
276 :
277 : static signed long get_until(char needle)
278 518 : {
279 : u8 *safe;
280 518 : unsigned long len = 0;
281 :
282 518 : safe=pc;
283 :
284 518 : got_until_eof = skip_until(needle);
285 :
286 518 : len = pc - safe;
287 518 : if (len >= GET_BUF_MAX )
288 : {
289 2 : tokenization_error( TKERROR,
290 : "get_until buffer overflow. Max is %d.\n", GET_BUF_MAX-1 );
291 2 : len = GET_BUF_MAX-1;
292 : }
293 :
294 518 : memcpy(statbuf, safe, len);
295 518 : statbuf[len]=0;
296 :
297 518 : if ( INVERSE(got_until_eof) )
298 : {
299 514 : if ( needle != '\n' ) pc++;
300 : }
301 518 : return len;
302 : }
303 :
304 :
305 : /* **************************************************************************
306 : *
307 : * We are going to use a fairly sophisticated mechanism to
308 : * make a smooth transition between processing the body
309 : * of a Macro, a User-defined Symbol or an FLOADed file
310 : * and the resumption of processing the source file, so
311 : * that the end-of-file will only be seen at the end of
312 : * the primary input file (the one from the command-line).
313 : * This mechanism will be tied in with the get_word() routine
314 : *
315 : * We are going to define a private data-structure in which
316 : * we will save the state of the current source file,
317 : * and from which, of course, we will recover it. Its
318 : * fields will be:
319 : * A pointer to the next structure in the list.
320 : * The saved values of START END and PC
321 : * The saved values of INAME and LINENO
322 : * A flag indicating that get-word should "pause"
323 : * before popping the source-stream because
324 : * the input file will be changing.
325 : * A place from which to save and recover the state of
326 : * whether we're testing for "Multi-line" strings;
327 : * to prevent undeserved "Multi-line" warnings
328 : * during Macro processing.
329 : * A pointer to a "resumption" routine, to call
330 : * when resuming processing the source file;
331 : * the routine takes a pointer parameter
332 : * and has no return value. The pointer
333 : * may be NULL if no routine is needed.
334 : * The pointer to pass as the parameter to the
335 : * resumption routine.
336 : *
337 : **************************************************************************** */
338 :
339 : typedef struct source_state
340 : {
341 : struct source_state *next;
342 : u8 *old_start;
343 : u8 *old_pc;
344 : u8 *old_end;
345 : char *old_iname;
346 : unsigned int old_lineno;
347 : bool pause_before_pop;
348 : bool sav_rep_multlin;
349 : void (*resump_func)();
350 : _PTR resump_param;
351 : } source_state_t ;
352 :
353 : static source_state_t *saved_source = NULL;
354 :
355 :
356 : /* **************************************************************************
357 : *
358 : * Function name: push_source
359 : * Synopsis: Save the state of the current source file, in the
360 : * source_state data-structure LIFO linked-list.
361 : *
362 : * Inputs:
363 : * Parameters:
364 : * res_func Pointer to routine to call when resuming
365 : * processing the saved source file.
366 : * res_param Parameter to pass to res_func.
367 : * Either or both pointers may be NULL.
368 : * file_chg TRUE if input file is going to change.
369 : * Global Variables:
370 : * start Points to current input buffer
371 : * end Points to end of current input buffer
372 : * pc Input point in current buffer
373 : * iname Name of current source file
374 : * lineno Line number in current source file
375 : * report_multiline Whether we're testing for "Multi-line"
376 : * Local Static Variables:
377 : * saved_source Pointer to the source_state data-structure
378 : *
379 : * Outputs:
380 : * Returned Value: NONE
381 : * Local Static Variables:
382 : * saved_source Points to new source_state entry
383 : * Memory Allocated
384 : * for the new source_state entry
385 : * When Freed?
386 : * When resuming processing the source file, by drop_source().
387 : *
388 : * Process Explanation:
389 : * The calling routine will establish the new input buffer via
390 : * a call to init_inbuf() or the like.
391 : *
392 : **************************************************************************** */
393 :
394 : void push_source( void (*res_func)(), _PTR res_parm, bool file_chg )
395 1458 : {
396 : source_state_t *new_sav_src;
397 :
398 1458 : new_sav_src = safe_malloc( sizeof(source_state_t), "pushing Source state");
399 :
400 1458 : new_sav_src->next = saved_source;
401 1458 : new_sav_src->old_start = start;
402 1458 : new_sav_src->old_pc = pc;
403 1458 : new_sav_src->old_end = end;
404 1458 : new_sav_src->old_iname = iname;
405 1458 : new_sav_src->old_lineno = lineno;
406 1458 : new_sav_src->pause_before_pop = file_chg;
407 1458 : new_sav_src->sav_rep_multlin = report_multiline;
408 1458 : new_sav_src->resump_func = res_func;
409 1458 : new_sav_src->resump_param = res_parm;
410 :
411 1458 : saved_source = new_sav_src;
412 1458 : }
413 :
414 : /* **************************************************************************
415 : *
416 : * Function name: drop_source
417 : * Synopsis: Remove last saved state of source processing
418 : * from the source_state LIFO linked-list,
419 : * without (or after) restoring.
420 : *
421 : * Inputs:
422 : * Parameters: NONE
423 : * Local Static Variables:
424 : * saved_source Pointer to the source_state data-structure
425 : *
426 : * Outputs:
427 : * Returned Value: NONE
428 : * Local Static Variables:
429 : * saved_source Points to previous source_state entry
430 : * Memory Freed
431 : * Saved source_state entry that was just "dropped"
432 : *
433 : * Error Detection:
434 : * None. Called only when linked-list is known not to be at end.
435 : *
436 : **************************************************************************** */
437 :
438 : static void drop_source( void)
439 1458 : {
440 1458 : source_state_t *former_sav_src = saved_source;
441 :
442 1458 : saved_source = saved_source->next ;
443 1458 : free( former_sav_src);
444 1458 : }
445 :
446 : /* **************************************************************************
447 : *
448 : * Function name: pop_source
449 : * Synopsis: Restore the state of source processing as it was
450 : * last saved in the source_state linked-list.
451 : *
452 : * Inputs:
453 : * Parameters: NONE
454 : * Local Static Variables:
455 : * saved_source Pointer to the source_state data-structure
456 : * need_to_pop_source If TRUE, don't check before popping.
457 : *
458 : * Outputs:
459 : * Returned Value: TRUE if reached end of linked-list
460 : * Global Variables:
461 : * start Points to restored input buffer
462 : * end Points to end of restored input buffer
463 : * pc Input point in restored buffer
464 : * iname Name of restored source file
465 : * lineno Line number in restored source file
466 : * report_multiline Restored to saved value.
467 : * Local Static Variables:
468 : * saved_source Points to previous source_state entry
469 : * need_to_pop_source TRUE if postponed popping till next time
470 : * Memory Freed
471 : * Saved source-state entry that was just "popped"
472 : *
473 : * Process Explanation:
474 : * First check the need_to_pop_source flag.
475 : * If it is set, we will clear it and go ahead and pop.
476 : * If it is not set, we will check the pause_before_pop field
477 : * of the top entry in the source_state linked-list.
478 : * If the pause_before_pop field is set, we will set the
479 : * need_to_pop_source flag and return.
480 : * If it is not, we will go ahead and pop.
481 : * If we are going to go ahead and pop, we will call the
482 : * "Resume-Processing" routine (if it's not NULL) before
483 : * we restore the saved source state.
484 : *
485 : **************************************************************************** */
486 :
487 : static bool pop_source( void )
488 1859 : {
489 1859 : bool retval = TRUE;
490 :
491 1859 : if ( saved_source != NULL )
492 : {
493 1688 : retval = FALSE;
494 1688 : if ( need_to_pop_source )
495 : {
496 244 : need_to_pop_source = FALSE;
497 : }else{
498 1444 : if ( saved_source->pause_before_pop )
499 : {
500 244 : need_to_pop_source = TRUE;
501 244 : return( retval);
502 : }
503 : }
504 :
505 1444 : if ( saved_source->resump_func != NULL )
506 : {
507 610 : saved_source->resump_func( saved_source->resump_param);
508 : }
509 1444 : report_multiline = saved_source->sav_rep_multlin;
510 1444 : lineno = saved_source->old_lineno ;
511 1444 : iname = saved_source->old_iname ;
512 1444 : end = saved_source->old_end ;
513 1444 : pc = saved_source->old_pc ;
514 1444 : start = saved_source->old_start ;
515 :
516 1444 : drop_source();
517 : }
518 1615 : return( retval);
519 : }
520 :
521 :
522 : /* **************************************************************************
523 : *
524 : * Function name: get_word
525 : * Synopsis: Gather the next "word" (aka Forth Token) from the
526 : * input stream.
527 : * A Forth Token is, of course, a string of characters
528 : * delimited by white-space (blank, tab or new-line).
529 : * Do not increment line-number counters here; leave
530 : * the delimiter after the word unconsumed.
531 : *
532 : * Inputs:
533 : * Parameters: NONE
534 : * Global Variables:
535 : * pc Input-stream Scanning Pointer
536 : * Local Static Variables:
537 : * need_to_pop_source If TRUE, pop_source() as first step
538 : *
539 : * Outputs:
540 : * Returned Value: Length of "word" gotten;
541 : * 0 if reached end of file.
542 : * -1 if reached end of primary input
543 : * (I.e., end of all source)
544 : * Global Variables:
545 : * statbuf Copy of "gotten" word
546 : * pc Advanced to end of "gotten" word,
547 : * (i.e., the next word is "consumed")
548 : * unless returning zero.
549 : * abs_token_no Incremented, if valid "word" (token)
550 : * was gotten.
551 : *
552 : * Process Explanation:
553 : * Skip whitespace to the start of the token,
554 : * then skip printable characters to the end of the token.
555 : * That part's easy, but what about when skipping whitespace
556 : * brings you to the end of the input stream?
557 : * First, look at the need_to_pop_source flag. If it's set,
558 : * we came to the end of the input stream the last time
559 : * through. Now we need to pop_source() first.
560 : * Next, we start skipping whitespace; this detects when we've
561 : * reached the end of the input stream. If we have,
562 : * then we need to pop_source() again.
563 : * If pop_source() returned a TRUE, we've reached the end
564 : * of the primary input file. Return -1.
565 : * If pop_source() turned the need_to_pop_source flag
566 : * to TRUE again, then we need to "pause" until the
567 : * next time through; return zero.
568 : * Otherwise, we proceed with collecting the token as described.
569 : *
570 : * Revision History:
571 : * Updated Thu, 23 Feb 2006 by David L. Paktor
572 : * Tied this routine in with a more sophisticated mechanism that
573 : * makes a smooth transition between processing the body of
574 : * a Macro, a User-defined Symbol or an FLOADed file, and
575 : * the resumption of processing the source file, so that the
576 : * end-of-file will only be seen at the end of the primary
577 : * input file (the one that came from the command-line)
578 : * Updated Fri, 24 Feb 2006 by David L. Paktor
579 : * This is trickier than I thought. Added a global indicator
580 : * of whether a file-boundary was crossed while getting
581 : * the word; previously, that was indicated by a return
582 : * value of zero, which now means something else...
583 : * The flag, closed_stream , will be cleared every time this
584 : * routine is entered, and set whenever close_stream() is
585 : * entered.
586 : * Updated Tue, 28 Feb 2006 at 10:13 PST by David L. Paktor
587 : * Trickier still. On crossing a file-boundary, must not
588 : * consume the first word in the resumed file, for one
589 : * call; instead, return zero. Consume it on the next
590 : * call. The closed_stream flag is now irrelevant and
591 : * has gone away.
592 : *
593 : **************************************************************************** */
594 :
595 : signed long get_word( void)
596 125435 : {
597 : size_t len;
598 : u8 *str;
599 : bool keep_skipping;
600 : bool pop_result;
601 :
602 125435 : if ( need_to_pop_source )
603 : {
604 244 : pop_result = pop_source();
605 : }
606 :
607 : do {
608 126635 : keep_skipping = skip_ws();
609 126635 : if ( keep_skipping )
610 : {
611 1615 : pop_result = pop_source();
612 1615 : if ( pop_result || need_to_pop_source )
613 : {
614 415 : statbuf[0] = 0;
615 415 : if ( pop_result )
616 : {
617 171 : return -1;
618 : }
619 244 : return 0;
620 : }
621 : }
622 126220 : } while ( keep_skipping );
623 :
624 125020 : str=pc;
625 733184 : while ( (str < end) && *str && *str!='\n' && *str!='\t' && *str!=' ')
626 483144 : str++;
627 :
628 125020 : len=(size_t)(str-pc);
629 125020 : if (len >= GET_BUF_MAX )
630 : {
631 0 : tokenization_error ( FATAL,
632 : "get_word buffer overflow. Max is %d.", GET_BUF_MAX-1 );
633 : }
634 :
635 125020 : memcpy(statbuf, pc, len);
636 125020 : statbuf[len]=0;
637 :
638 : #ifdef DEBUG_SCANNER
639 : printf("%s:%d: debug: read token '%s', length=%ld\n",
640 : iname, lineno, statbuf, len);
641 : #endif
642 125020 : pc+=len;
643 125020 : abs_token_no++;
644 125020 : return len;
645 : }
646 :
647 :
648 : /* **************************************************************************
649 : *
650 : * Function name: get_word_in_line
651 : * Synopsis: Get the next word on the same line as the current
652 : * line of input. If the end of line was reached
653 : * before a word was found, print an error message
654 : * and return an indication.
655 : *
656 : * Inputs:
657 : * Parameters:
658 : * func_nam Name of the function expecting the same-line
659 : * input; for use in the Error Message.
660 : * If NULL, do not issue Error Message
661 : * Global Variables:
662 : * pc Input character pointer. Saved for comparison
663 : * lineno Current input line number. Saved for comparison
664 : *
665 : * Outputs:
666 : * Returned Value: TRUE = success. Word was acquired on same line.
667 : * Global Variables:
668 : * statbuf Advanced to the next word in the input stream.
669 : * pc Advanced if no error; restored otherwise.
670 : *
671 : * Error Detection:
672 : * If no next word is gotten (i.e., we're at end-of-file), or if
673 : * one is gotten but not on the same line, the routine will
674 : * return FALSE; if func_nam is not NULL, an ERROR Message
675 : * will be issued.
676 : * Also, the values of PC LINENO and ABS_TOKEN_NO will be reset
677 : * to the positions they had when this routine was entered.
678 : *
679 : **************************************************************************** */
680 :
681 : bool get_word_in_line( char *func_nam)
682 1733 : {
683 : signed long wlen;
684 1733 : bool retval = TRUE;
685 1733 : u8 *save_pc = pc;
686 1733 : unsigned int save_lineno = lineno;
687 1733 : unsigned int save_abs_token_no = abs_token_no;
688 :
689 : /* Copy of function name, for error message */
690 : char func_cpy[FUNC_CPY_BUF_SIZE+1];
691 :
692 : /* Do this first, in the likely event that func_nam was statbuf */
693 1733 : if ( func_nam != NULL )
694 : {
695 1620 : strncpy( func_cpy, func_nam, FUNC_CPY_BUF_SIZE);
696 1620 : func_cpy[FUNC_CPY_BUF_SIZE] = 0; /* Guarantee a null terminator */
697 : }
698 :
699 1733 : wlen = get_word();
700 1733 : if ( ( lineno != save_lineno ) || ( wlen <= 0 ) )
701 : {
702 16 : abs_token_no = save_abs_token_no;
703 16 : lineno = save_lineno;
704 16 : pc = save_pc;
705 16 : retval = FALSE;
706 16 : if ( func_nam != NULL )
707 : {
708 15 : tokenization_error ( TKERROR,
709 : "Operator %s expects its target on the same line\n",
710 : strupr(func_cpy));
711 : }
712 : }
713 1733 : return ( retval );
714 : }
715 :
716 :
717 : /* **************************************************************************
718 : *
719 : * Function name: get_rest_of_line
720 : * Synopsis: Get all the remaining text on the same line as
721 : * the current line of input. If there is no text
722 : * (not counting whitespace) before the end of line,
723 : * return an indication.
724 : *
725 : * Inputs:
726 : * Parameters: NONE
727 : * Global Variables:
728 : * pc Input character pointer. Saved for restoration
729 : * lineno Current input line number. Saved for comparison
730 : *
731 : * Outputs:
732 : * Returned Value: TRUE = success. Text was acquired on same line.
733 : * Global Variables:
734 : * statbuf Contains the text found in the input stream.
735 : * pc Advanced to end of line or of whitespace, if
736 : * no error; restored otherwise.
737 : * lineno Preserved if no error; otherwise, restored.
738 : * abs_token_no Restored if error; otherwise, advanced as normal.
739 : *
740 : * Error Detection:
741 : * Routine will return FALSE if no text is gotten on the same line.
742 : *
743 : **************************************************************************** */
744 :
745 : bool get_rest_of_line( void)
746 112 : {
747 112 : bool retval = FALSE;
748 112 : u8 *save_pc = pc;
749 112 : unsigned int save_lineno = lineno;
750 112 : unsigned int save_abs_token_no = abs_token_no;
751 :
752 112 : if ( INVERSE( skip_ws() ) )
753 : {
754 112 : if ( lineno == save_lineno )
755 : {
756 111 : signed long wlen = get_until('\n');
757 111 : if ( wlen > 0 ) retval = TRUE;
758 : }else{
759 1 : abs_token_no = save_abs_token_no;
760 1 : lineno = save_lineno;
761 1 : pc = save_pc;
762 : }
763 : }
764 112 : return( retval);
765 : }
766 :
767 :
768 : /* **************************************************************************
769 : *
770 : * Function name: warn_unterm
771 : * Synopsis: Message for "Unterminated ..." something
772 : * Show saved line-number, where the "something" started,
773 : * and the definition, if any, in which it occurred.
774 : *
775 : * Inputs:
776 : * Parameters:
777 : * severity Type of error/warning message to display
778 : * usually either WARNING or TKERROR
779 : * something String to print after "Unterminated"
780 : * saved_lineno Line-Number where the "something" started
781 : * Global Variables:
782 : * lineno Saved, then restored.
783 : * last_colon_defname Used only if unterm_is_colon is TRUE;
784 : * Local Static Variables:
785 : * unterm_is_colon See 07 Mar 2006 entry under Rev'n History
786 : *
787 : * Outputs:
788 : * Returned Value: NONE
789 : * Global Variables:
790 : * lineno Saved, then restored.
791 : * Local Static Variables:
792 : * unterm_is_colon Reset to FALSE
793 : * Printout:
794 : * Warning or Error message
795 : *
796 : * Revision History:
797 : * Updated Mon, 06 Mar 2006 by David L. Paktor
798 : * Added call to in_last_colon()
799 : * Updated Tue, 07 Mar 2006 by David L. Paktor
800 : * Call to in_last_colon() works okay in most cases except for
801 : * when the "something" is a Colon Definition; there, it
802 : * results in the phrase: ... Definition in definition of ...
803 : * which is awkward. To eliminate that, I am introducing
804 : * a Local Static Variable flag called unterm_is_colon
805 : * which will be set only in the appropriate place and
806 : * re-cleared here. It's a retro-fit, of course; it could
807 : * have been a parameter had the need for it occurred when
808 : * this routine was first constructed...
809 : *
810 : **************************************************************************** */
811 :
812 : static bool unterm_is_colon = FALSE;
813 : void warn_unterm( int severity, char *something, unsigned int saved_lineno)
814 25 : {
815 25 : unsigned int tmp = lineno;
816 25 : lineno = saved_lineno;
817 25 : if ( unterm_is_colon )
818 : {
819 3 : tokenization_error( severity, "Unterminated %s of %s\n",
820 : something, strupr( last_colon_defname) );
821 3 : unterm_is_colon = FALSE;
822 : }else{
823 22 : tokenization_error( severity, "Unterminated %s", something);
824 22 : in_last_colon();
825 : }
826 25 : lineno = tmp;
827 25 : }
828 :
829 : /* **************************************************************************
830 : *
831 : * Function name: warn_if_multiline
832 : * Synopsis: Test for "Multi-line ..." something and issue WARNING
833 : * Show saved line-number, where the "something" started
834 : *
835 : * Inputs:
836 : * Parameters:
837 : * something String to print after "Unterminated"
838 : * start_lineno Line-Number where the "something" started
839 : * Global Variables:
840 : * lineno Line-Number where we are now
841 : * iname Input file name, to satisfy ...where_started()
842 : * (Not crossing any actual file boundary.)
843 : * report_multiline TRUE = go ahead with the message
844 : *
845 : * Outputs:
846 : * Returned Value: NONE
847 : * Global Variables:
848 : * report_multiline Restored to TRUE.
849 : *
850 : * Error Detection:
851 : * Only issue message if the current lineno doesn't equal
852 : * the start_lineno
853 : *
854 : * Process Explanation:
855 : * The directive "multi-line" allows the user to specify that
856 : * the next "Multi-line ..." something is intentional, and
857 : * will cause its warning to be suppressed. It remains in
858 : * effect until it's "used"; afterwards, it's reset.
859 : *
860 : **************************************************************************** */
861 :
862 : void warn_if_multiline( char *something, unsigned int start_lineno )
863 36922 : {
864 36922 : if ( report_multiline && ( start_lineno != lineno ) )
865 : {
866 130 : tokenization_error( WARNING, "Multi-line %s, started", something);
867 130 : where_started( iname, start_lineno);
868 : }
869 36922 : report_multiline = TRUE;
870 36922 : }
871 :
872 :
873 : /* **************************************************************************
874 : *
875 : * Function name: string_remark
876 : * Synopsis: Suspend string parsing past end of line and
877 : * whitespace at start of the new line.
878 : *
879 : * Inputs:
880 : * Parameters:
881 : * errmsg_txt Text to be used for error-message.
882 : * Global Variables:
883 : * pc Input-source Scanning pointer
884 : *
885 : * Outputs:
886 : * Returned Value: NONE
887 : * Global Variables:
888 : * pc Will point to first non-blank in new line
889 : *
890 : * Error Detection:
891 : * The return value of the skip_until() or skip_ws() routine
892 : * will indicate if PC goes past END. Issue a WARNING.
893 : * The calling routine will handle things from there.
894 : *
895 : **************************************************************************** */
896 :
897 : static void string_remark(char *errmsg_txt)
898 54 : {
899 54 : unsigned int sav_lineno = lineno;
900 54 : bool eof = skip_until('\n');
901 54 : if ( ! eof )
902 : {
903 54 : eof = skip_ws();
904 : }
905 54 : if ( eof )
906 : {
907 1 : warn_unterm(WARNING, errmsg_txt, sav_lineno);
908 : }
909 :
910 54 : }
911 :
912 :
913 : /* Convert the given string to a number in the supplied base */
914 : /* Allow -- and ignore -- embedded periods. */
915 : /* The endptr param represents a pointer that will be updated
916 : * with the address of the first non-numeric character encountered,
917 : * (unless it is a NULL, in which case it is ignored).
918 : */
919 : /* There is no test for a completely invalid string;
920 : * the calling routine is responsible for ascertaining
921 : * the validity of the string being passed.
922 : */
923 : static long parse_number(u8 *start, u8 **endptr, int lbase)
924 11827 : {
925 11827 : long val = 0;
926 11827 : bool negative = FALSE ;
927 : int curr;
928 11827 : u8 *nptr=start;
929 :
930 11827 : curr = *nptr;
931 11827 : if (curr == '-')
932 : {
933 0 : negative = TRUE ;
934 0 : nptr++;
935 : }
936 :
937 37570 : for (curr = *nptr; (curr = *nptr); nptr++) {
938 26467 : if ( curr == '.' )
939 8 : continue;
940 26459 : if ( curr >= '0' && curr <= '9')
941 24765 : curr -= '0';
942 1694 : else if (curr >= 'a' && curr <= 'f')
943 1023 : curr += 10 - 'a';
944 671 : else if (curr >= 'A' && curr <= 'F')
945 2 : curr += 10 - 'A';
946 : else
947 669 : break;
948 :
949 25790 : if (curr >= lbase)
950 55 : break;
951 :
952 25735 : val *= lbase;
953 25735 : val += curr;
954 : }
955 :
956 : #ifdef DEBUG_SCANNER
957 : if (curr)
958 : printf( "%s:%d: warning: couldn't parse number '%s' (%d/%d)\n",
959 : iname, lineno, start,curr,lbase);
960 : #endif
961 :
962 11827 : if (endptr)
963 11548 : *endptr=nptr;
964 :
965 11827 : if (negative)
966 : {
967 0 : val = -val;
968 : }
969 11827 : return val;
970 : }
971 :
972 : /* **************************************************************************
973 : *
974 : * Function name: add_byte_to_string
975 : * Synopsis: Add the given byte (or character) to the string
976 : * being accumulated in statbuf, but protect
977 : * against a buffer overflow.
978 : *
979 : * Inputs:
980 : * Parameters:
981 : * nu_byte The given character to be added
982 : * walk Pointer to pointer to the position
983 : * in statbuf where the character
984 : * is to be placed
985 : * Global Variables:
986 : * statbuf Buffer where the string is accumulated
987 : * Macros:
988 : * GET_BUF_MAX Size of the buffer
989 : *
990 : * Outputs:
991 : * Returned Value: NONE
992 : * Supplied Pointers:
993 : * **walk Given character is placed here
994 : * *walk Incremented in any case
995 : *
996 : * Error Detection:
997 : * If walk has reached end of string buffer, do not place
998 : * the character, but continue to increment walk .
999 : * Calling routine will detect overflow.
1000 : *
1001 : **************************************************************************** */
1002 :
1003 : static void add_byte_to_string( u8 nu_byte, u8 **walk )
1004 1478035 : {
1005 1478035 : if ( *walk - statbuf < GET_BUF_MAX )
1006 : {
1007 1475587 : **walk = nu_byte;
1008 : }
1009 1478035 : (*walk)++;
1010 1478035 : }
1011 :
1012 : /* **************************************************************************
1013 : *
1014 : * Function name: c_string_escape
1015 : * Synopsis: Process C-style escape syntax in strings
1016 : *
1017 : * Inputs:
1018 : * Parameters:
1019 : * walk Pointer to pointer to area into
1020 : * which to put acquired values
1021 : * Global Variables:
1022 : * pc Input-source Scanning pointer
1023 : *
1024 : * Outputs:
1025 : * Returned Value: NONE
1026 : * Global Variables:
1027 : * pc Point to last character processed.
1028 : * Supplied Pointers:
1029 : * *walk Advanced by number of bytes acquired
1030 : *
1031 : * Error Detection:
1032 : * WARNING conditions. See under "Process Explanation" below.
1033 : *
1034 : * Process Explanation:
1035 : * Start with PC pointing to the first character to process
1036 : * i.e., after the backslash.
1037 : * We recognize newline, tab and numbers
1038 : * A digit-string in the current base can be converted to a number.
1039 : * The first non-numeric character ends the numeric sequence
1040 : * and gets swallowed up.
1041 : * If the number exceeds the size of a byte, use the truncated
1042 : * value and issue a WARNING.
1043 : * If the first character in the "digit"-string was non-numeric,
1044 : * use the character literally and issue a WARNING.
1045 : * If the character that ended the numeric sequence is a quote,
1046 : * it might be the end of the string, or the start of a
1047 : * special-character or even of an "( ... ) hex-sequence,
1048 : * so don't swallow it up.
1049 : *
1050 : * Still to be done:
1051 : * Better protection against PC pointer-over-run past END.
1052 : * Currently, this works, but it's held together by threads:
1053 : * Because init_stream forces a null-byte at the end of
1054 : * the input buffer, parse_number() exits immediately upon
1055 : * encountering it. This situation could be covered more
1056 : * robustly...
1057 : *
1058 : **************************************************************************** */
1059 :
1060 : static void c_string_escape( u8 **walk)
1061 159 : {
1062 159 : char c = *pc;
1063 : u8 val;
1064 : /* We will come out of this "switch" statement
1065 : * with a value for val and a decision
1066 : * as to whether to write it.
1067 : */
1068 159 : bool write_val = TRUE;
1069 :
1070 159 : switch (c)
1071 : {
1072 : case 'n':
1073 : /* newline */
1074 17 : val = '\n';
1075 17 : break;
1076 : case 't':
1077 : /* tab */
1078 27 : val = '\t';
1079 27 : break;
1080 : default:
1081 :
1082 : /* Digit-string? Convert it to a number, using the current base.
1083 : * The first non-numeric character ends the numeric sequence
1084 : * and gets swallowed up.
1085 : * If the number exceeds the size of a byte, use the truncated
1086 : * value and issue a WARNING.
1087 : * If the first character in the "digit"-string was non-numeric,
1088 : * use the character literally and issue a WARNING.
1089 : */
1090 :
1091 : /*
1092 : * If the sequence ender is a quote, it might be the end of
1093 : * the string, or the start of a special-character or even
1094 : * of an "( ... ) hex-sequence, so don't swallow it up.
1095 : */
1096 : {
1097 : long lval;
1098 115 : u8 *sav_pc = pc;
1099 115 : lval=parse_number(pc, &pc, base);
1100 115 : val = (u8)lval;
1101 : #ifdef DEBUG_SCANNER
1102 : if (verbose)
1103 : printf( "%s:%d: debug: escape code "
1104 : "0x%x\n",iname, lineno, val);
1105 : #endif
1106 115 : if ( lval > 0x0ff )
1107 : {
1108 14 : tokenization_error ( WARNING,
1109 : "Numeric String after \\ overflows byte. "
1110 : "Using 0x%02x.\n", val);
1111 : }
1112 :
1113 115 : if ( pc == sav_pc )
1114 : {
1115 : /* NOTE: Here, PC hasn't been advanced past its
1116 : * saved value, so we can count on C remaining
1117 : * unchanged since the start of the routine.
1118 : */
1119 : /* Don't use the null-byte at the end of the buffer */
1120 40 : if ( ( pc >= end )
1121 : /* or a sequence-ending quote. */
1122 : || ( c == '"' ) )
1123 : {
1124 2 : write_val = FALSE;
1125 : }else{
1126 : /* In the WARNING message, print the character
1127 : * if it's printable or show it in hex
1128 : * if it's not.
1129 : */
1130 38 : if ( (c > 0x20 ) && ( c <= 0x7e) )
1131 : {
1132 20 : tokenization_error ( WARNING,
1133 : "Unrecognized character, %c, "
1134 : "after \\ in string. "
1135 : "Using it literally.\n", c);
1136 : }else{
1137 18 : tokenization_error ( WARNING,
1138 : "Unrecognized character, 0x%02x, "
1139 : "after \\ in string. "
1140 : "Using it literally.\n", c);
1141 : }
1142 38 : val = c;
1143 : }
1144 : }
1145 : /* NOTE: Here, however, PC may have been advanced... */
1146 : /* Don't swallow the sequence-ender if it's a quote. */
1147 115 : if ( *pc == '"' )
1148 : {
1149 16 : pc--;
1150 : }
1151 :
1152 : } /* End of the "default" clause */
1153 : } /* End of the "switch" statement */
1154 :
1155 159 : if ( write_val ) add_byte_to_string( val, walk );
1156 :
1157 159 : }
1158 :
1159 : /* **************************************************************************
1160 : *
1161 : * Function name: get_sequence
1162 : * Synopsis: Process the Hex-Number option in strings
1163 : * Protect against PC pointer-over-run past END.
1164 : *
1165 : * Inputs:
1166 : * Parameters:
1167 : * **walk Pointer to pointer to area into which
1168 : * to put acquired values
1169 : * Global Variables:
1170 : * pc Input-source Scanning pointer
1171 : * end End of input-source buffer
1172 : *
1173 : * Outputs:
1174 : * Returned Value: TRUE = "Normal Completion" (I.e., not EOF)
1175 : * Global Variables:
1176 : * pc Points at terminating close-paren, or END
1177 : * lineno Input File Line-Number Counter, may be incr'd
1178 : * Supplied Pointers:
1179 : * *walk Advanced by number of values acquired
1180 : *
1181 : * Error Detection:
1182 : * End-of-file encountered before end of hex-sequence:
1183 : * Issue a Warning, conclude processing, return FALSE.
1184 : *
1185 : * Process Explanation:
1186 : * SETUP and RULES:
1187 : * Start with PC pointing to the first character
1188 : * after the '(' (Open-Paren)
1189 : * Bytes are gathered from digits in pairs, except
1190 : * when separated they are treated singly.
1191 : * Allow a backslash in the middle of the sequence
1192 : * to skip to the end of the line and past the
1193 : * whitespace at the start of the next line,
1194 : * i.e., it acts as a comment-escape.
1195 : *
1196 : * INITIALIZE:
1197 : * PV_indx = 0
1198 : * Set return-indicator to "Abnormal Completion"
1199 : * Ready_to_Parse = FALSE
1200 : * Stuff NULL into PVAL[2]
1201 : * WHILE PC is less than END
1202 : * Pick up character at PC into Next_Ch
1203 : * IF Next_Ch is close-paren :
1204 : * Set return-indicator to "Normal Completion".
1205 : * Done! Break out of loop.
1206 : * ENDIF
1207 : * IF comment-escape behavior (controlled by means of a
1208 : * command-line switch) is allowed
1209 : * IF Next_Ch is backslash :
1210 : * Skip to end-of line, skip whitespace.
1211 : * If that makes PC reach END : WARNING message.
1212 : * (Don't need to break out of loop;
1213 : * normal test will terminate.)
1214 : * CONTINUE Loop.
1215 : * (Don't increment PC; PC is already at right place).
1216 : * ENDIF
1217 : * ENDIF
1218 : * IF Next_Ch is a valid Hex-Digit character :
1219 : * Stuff it into PVAL[PV_indx]
1220 : * IF (PV_indx is 0) :
1221 : * Increment PV_indx
1222 : * ELSE
1223 : * Set Ready_to_Parse to TRUE
1224 : * ENDIF
1225 : * ELSE
1226 : * IF Next_Ch is a New-Line, increment Line Number counter
1227 : * IF (PV_indx is 1) :
1228 : * Stuff NULL into PVAL[1]
1229 : * Set Ready_to_Parse to TRUE
1230 : * ENDIF
1231 : * ENDIF
1232 : * IF Ready_to_Parse
1233 : * Parse PVAL
1234 : * Stuff into WALK
1235 : * Reset PV_indx to zero
1236 : * Reset Ready_to_Parse to FALSE
1237 : * ENDIF
1238 : * Increment PC
1239 : * REPEAT
1240 : * Return with Normal/Abnormal completion indicator
1241 : *
1242 : **************************************************************************** */
1243 :
1244 : static bool get_sequence(u8 **walk)
1245 33 : {
1246 33 : int pv_indx = 0;
1247 33 : bool retval = FALSE; /* "Abnormal Completion" indicator */
1248 33 : bool ready_to_parse = FALSE;
1249 : char next_ch;
1250 : char pval[3];
1251 :
1252 : #ifdef DEBUG_SCANNER
1253 : printf("%s:%d: debug: hex field:", iname, lineno);
1254 : #endif
1255 33 : pval[2]=0;
1256 :
1257 888 : while ( pc < end )
1258 : {
1259 854 : next_ch = *pc;
1260 854 : if ( next_ch == ')' )
1261 : {
1262 32 : retval = TRUE;
1263 32 : break;
1264 : }
1265 822 : if ( hex_remark_escape )
1266 : {
1267 685 : if ( next_ch == '\\' )
1268 : {
1269 34 : string_remark("string hex-sequence remark");
1270 34 : continue;
1271 : }
1272 : }
1273 788 : if ( isxdigit(next_ch) )
1274 : {
1275 469 : pval[pv_indx] = next_ch;
1276 469 : if ( pv_indx == 0 )
1277 : {
1278 279 : pv_indx++;
1279 : }else{
1280 |