Context Navigation

source: mainline/uspace/app/sbi/src/lex.c

Visit:

Last change on this file was 1433ecda, checked in by Jiri Svoboda <jiri@…>, 7 years ago
Fix cstyle: make ccheck-fix and commit only files where all the changes are good.
Property mode set to `100644`
File size: 16.4 KB

Rev	Line
[09ababb7]	1	/*
[051b3db8]	2	* Copyright (c) 2011 Jiri Svoboda
[09ababb7]	3	* All rights reserved.
	4	*
	5	* Redistribution and use in source and binary forms, with or without
	6	* modification, are permitted provided that the following conditions
	7	* are met:
	8	*
	9	* - Redistributions of source code must retain the above copyright
	10	* notice, this list of conditions and the following disclaimer.
	11	* - Redistributions in binary form must reproduce the above copyright
	12	* notice, this list of conditions and the following disclaimer in the
	13	* documentation and/or other materials provided with the distribution.
	14	* - The name of the author may not be used to endorse or promote products
	15	* derived from this software without specific prior written permission.
	16	*
	17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	27	*/
	28
	29	/** @file Lexer (lexical analyzer).
	30	*
	31	* Consumes a text file and produces a sequence of lexical elements (lems).
	32	*/
	33
	34	#include <stdio.h>
	35	#include <stdlib.h>
[23de644]	36	#include "bigint.h"
[051bc69a]	37	#include "cspan.h"
[09ababb7]	38	#include "mytypes.h"
	39	#include "input.h"
[94d484a]	40	#include "os/os.h"
[09ababb7]	41	#include "strtab.h"
	42
	43	#include "lex.h"
	44
	45	#define TAB_WIDTH 8
	46
[051b3db8]	47	typedef enum {
	48	cs_chr,
	49	cs_str
	50	} chr_str_t;
	51
[37f527b]	52	static void lex_touch(lex_t *lex);
	53	static bool_t lex_read_try(lex_t *lex);
[94d484a]	54
	55	static void lex_skip_comment(lex_t *lex);
[09ababb7]	56	static void lex_skip_ws(lex_t *lex);
	57	static bool_t is_wstart(char c);
	58	static bool_t is_wcont(char c);
	59	static bool_t is_digit(char c);
	60	static void lex_word(lex_t *lex);
[074444f]	61	static void lex_char(lex_t *lex);
[09ababb7]	62	static void lex_number(lex_t *lex);
	63	static void lex_string(lex_t *lex);
[051b3db8]	64	static void lex_char_string_core(lex_t *lex, chr_str_t cs);
[09ababb7]	65	static int digit_value(char c);
	66
	67	/* Note: This imposes an implementation limit on identifier length. */
	68	#define IBUF_SIZE 128
	69	static char ident_buf[IBUF_SIZE + 1];
	70
	71	/* XXX This imposes an implementation limit on string literal length. */
	72	#define SLBUF_SIZE 128
	73	static char strlit_buf[SLBUF_SIZE + 1];
	74
	75	/** Lclass-string pair */
	76	struct lc_name {
	77	lclass_t lclass;
	78	const char *name;
	79	};
	80
	81	/** Keyword names. Used both for printing and recognition. */
	82	static struct lc_name keywords[] = {
[051bc69a]	83	{ lc_and, "and" },
[37f527b]	84	{ lc_as, "as" },
[074444f]	85	{ lc_bool, "bool" },
[051bc69a]	86	{ lc_break, "break" },
[37f527b]	87	{ lc_builtin, "builtin" },
[051bc69a]	88	{ lc_char, "char" },
[09ababb7]	89	{ lc_class, "class" },
[38aaacc2]	90	{ lc_deleg, "deleg" },
[09ababb7]	91	{ lc_do, "do" },
[051bc69a]	92	{ lc_elif, "elif" },
[09ababb7]	93	{ lc_else, "else" },
	94	{ lc_end, "end" },
[051bc69a]	95	{ lc_enum, "enum" },
[09ababb7]	96	{ lc_except, "except" },
[074444f]	97	{ lc_false, "false" },
[09ababb7]	98	{ lc_finally, "finally" },
	99	{ lc_for, "for" },
	100	{ lc_fun, "fun" },
	101	{ lc_get, "get" },
	102	{ lc_if, "if" },
	103	{ lc_in, "in" },
	104	{ lc_int, "int" },
	105	{ lc_interface, "interface" },
	106	{ lc_is, "is" },
[fa36f29]	107	{ lc_new, "new" },
[051bc69a]	108	{ lc_not, "not" },
[fa36f29]	109	{ lc_nil, "nil" },
[051bc69a]	110	{ lc_or, "or" },
[09ababb7]	111	{ lc_override, "override" },
[94d484a]	112	{ lc_packed, "packed" },
[09ababb7]	113	{ lc_private, "private" },
	114	{ lc_prop, "prop" },
	115	{ lc_protected, "protected" },
	116	{ lc_public, "public" },
	117	{ lc_raise, "raise" },
[37f527b]	118	{ lc_resource, "resource" },
[09ababb7]	119	{ lc_return, "return" },
[fa36f29]	120	{ lc_self, "self" },
[09ababb7]	121	{ lc_set, "set" },
	122	{ lc_static, "static" },
	123	{ lc_string, "string" },
	124	{ lc_struct, "struct" },
[051b3db8]	125	{ lc_switch, "switch" },
[09ababb7]	126	{ lc_then, "then" },
	127	{ lc_this, "this" },
[074444f]	128	{ lc_true, "true" },
[09ababb7]	129	{ lc_var, "var" },
	130	{ lc_with, "with" },
[051b3db8]	131	{ lc_when, "when" },
[09ababb7]	132	{ lc_while, "while" },
	133	{ lc_yield, "yield" },
	134
	135	{ 0, NULL }
	136	};
	137
	138	/** Other simple lclasses. Only used for printing. */
	139	static struct lc_name simple_lc[] = {
	140	{ lc_invalid, "INVALID" },
	141	{ lc_eof, "EOF" },
	142
	143	/* Operators */
	144	{ lc_period, "." },
	145	{ lc_slash, "/" },
	146	{ lc_lparen, "(" },
	147	{ lc_rparen, ")" },
	148	{ lc_lsbr, "[" },
	149	{ lc_rsbr, "]" },
	150	{ lc_equal, "==" },
	151	{ lc_notequal, "!=" },
	152	{ lc_lt, "<" },
	153	{ lc_gt, ">" },
	154	{ lc_lt_equal, "<=" },
	155	{ lc_gt_equal, ">=" },
	156	{ lc_assign, "=" },
	157	{ lc_plus, "+" },
[23de644]	158	{ lc_minus, "-" },
	159	{ lc_mult, "*" },
[09ababb7]	160	{ lc_increase, "+=" },
	161
	162	/* Punctuators */
	163	{ lc_comma, "," },
	164	{ lc_colon, ":" },
	165	{ lc_scolon, ";" },
	166
	167	{ 0, NULL },
	168	};
	169
[1ebc1a62]	170	/** Print lclass value.
	171	*
	172	* Prints lclass (lexical element class) value in human-readable form
	173	* (for debugging).
	174	*
	175	* @param lclass Lclass value for display.
	176	*/
[09ababb7]	177	void lclass_print(lclass_t lclass)
	178	{
	179	struct lc_name *dp;
	180
	181	dp = keywords;
	182	while (dp->name != NULL) {
	183	if (dp->lclass == lclass) {
	184	printf("%s", dp->name);
	185	return;
	186	}
	187	++dp;
	188	}
	189
	190	dp = simple_lc;
	191	while (dp->name != NULL) {
	192	if (dp->lclass == lclass) {
	193	printf("%s", dp->name);
	194	return;
	195	}
	196	++dp;
	197	}
	198
	199	switch (lclass) {
	200	case lc_ident:
	201	printf("ident");
	202	break;
	203	case lc_lit_int:
	204	printf("int_literal");
	205	break;
	206	case lc_lit_string:
	207	printf("string_literal");
	208	break;
	209	default:
	210	printf("<unknown?>");
	211	break;
	212	}
	213	}
	214
[1ebc1a62]	215	/** Print lexical element.
	216	*
	217	* Prints lexical element in human-readable form (for debugging).
	218	*
	219	* @param lem Lexical element for display.
	220	*/
[09ababb7]	221	void lem_print(lem_t *lem)
	222	{
	223	lclass_print(lem->lclass);
	224
	225	switch (lem->lclass) {
	226	case lc_ident:
[38aaacc2]	227	printf("('%s')", strtab_get_str(lem->u.ident.sid));
[09ababb7]	228	break;
	229	case lc_lit_int:
[23de644]	230	printf("(");
	231	bigint_print(&lem->u.lit_int.value);
	232	printf(")");
[09ababb7]	233	break;
	234	case lc_lit_string:
	235	printf("(\"%s\")", lem->u.lit_string.value);
	236	default:
	237	break;
	238	}
	239	}
	240
[1ebc1a62]	241	/** Print lem coordinates.
	242	*
	243	* Print the coordinates (line number, column number) of a lexical element.
	244	*
	245	* @param lem Lexical element for coordinate printing.
	246	*/
[09ababb7]	247	void lem_print_coords(lem_t *lem)
	248	{
[051bc69a]	249	cspan_print(lem->cspan);
[09ababb7]	250	}
	251
[1ebc1a62]	252	/** Initialize lexer instance.
	253	*
	254	* @param lex Lexer object to initialize.
	255	* @param input Input to associate with lexer.
	256	*/
[09ababb7]	257	void lex_init(lex_t lex, struct input input)
	258	{
[b7fd2a0]	259	errno_t rc;
[09ababb7]	260
	261	lex->input = input;
	262
	263	rc = input_get_line(lex->input, &lex->inbuf);
	264	if (rc != EOK) {
	265	printf("Error reading input.\n");
	266	exit(1);
	267	}
	268
	269	lex->ibp = lex->inbuf;
	270	lex->col_adj = 0;
[051bc69a]	271	lex->prev_valid = b_false;
[37f527b]	272	lex->current_valid = b_true;
[09ababb7]	273	}
	274
[37f527b]	275	/** Advance to next lexical element.
	276	*
[1ebc1a62]	277	* The new element is read in lazily then it is actually accessed.
	278	*
	279	* @param lex Lexer object.
[37f527b]	280	*/
[09ababb7]	281	void lex_next(lex_t *lex)
[37f527b]	282	{
	283	/* Make sure the current lem has already been read in. */
	284	lex_touch(lex);
	285
	286	/* Force a new lem to be read on next access. */
	287	lex->current_valid = b_false;
	288	}
	289
	290	/** Get current lem.
	291	*
	292	* The returned pointer is invalidated by next call to lex_next()
[1ebc1a62]	293	*
	294	* @param lex Lexer object.
[23de644]	295	* @return Pointer to current lem. Owned by @a lex and only valid
[051bc69a]	296	* until next call to lex_xxx().
[37f527b]	297	*/
	298	lem_t lex_get_current(lex_t lex)
	299	{
	300	lex_touch(lex);
	301	return &lex->current;
	302	}
	303
[051bc69a]	304	/** Get previous lem if valid.
	305	*
	306	* The returned pointer is invalidated by next call to lex_next()
	307	*
	308	* @param lex Lexer object.
	309	* @return Pointer to previous lem. Owned by @a lex and only valid
	310	* until next call to lex_xxx().
	311	*/
	312	lem_t lex_peek_prev(lex_t lex)
	313	{
	314	if (lex->current_valid == b_false) {
	315	/*
	316	* This means the head is advanced but next lem was not read.
	317	* Thus the previous lem is still in @a current.
	318	*/
	319	return &lex->current;
	320	}
	321
	322	if (lex->prev_valid != b_true) {
	323	/* Looks like we are still at the first lem. */
	324	return NULL;
	325	}
	326
	327	/*
	328	* Current lem has been read in. Thus the previous lem was moved to
	329	* @a previous.
	330	*/
	331	return &lex->prev;
	332	}
	333
[1ebc1a62]	334	/** Read in the current lexical element (unless already read in).
	335	*
	336	* @param lex Lexer object.
	337	*/
[37f527b]	338	static void lex_touch(lex_t *lex)
[94d484a]	339	{
	340	bool_t got_lem;
	341
[37f527b]	342	if (lex->current_valid == b_true)
	343	return;
	344
[051bc69a]	345	/* Copy previous lem */
	346	lex->prev = lex->current;
	347	lex->prev_valid = b_true;
	348
[94d484a]	349	do {
[37f527b]	350	got_lem = lex_read_try(lex);
[94d484a]	351	} while (got_lem == b_false);
[37f527b]	352
	353	lex->current_valid = b_true;
[94d484a]	354	}
	355
	356	/** Try reading next lexical element.
	357	*
[1ebc1a62]	358	* Attemps to read the next lexical element. In some cases (such as a comment)
	359	* this function will need to give it another try and returns @c b_false
	360	* in such case.
	361	*
	362	* @param lex Lexer object.
	363	* @return @c b_true on success or @c b_false if it needs
	364	* restarting. On success the lem is stored to
	365	* the current lem in @a lex.
[94d484a]	366	*/
[37f527b]	367	static bool_t lex_read_try(lex_t *lex)
[09ababb7]	368	{
[051bc69a]	369	char bp, lsp;
	370	int line0, col0;
[09ababb7]	371
	372	lex_skip_ws(lex);
	373
	374	/*
	375	* Record lem coordinates. Line number we already have. For column
	376	* number we start with position in the input buffer. This works
	377	* for all characters except tab. Thus we keep track of tabs
	378	* separately using col_adj.
	379	*/
[051bc69a]	380	line0 = input_get_line_no(lex->input);
	381	col0 = 1 + lex->col_adj + (lex->ibp - lex->inbuf);
	382
	383	lex->current.cspan = cspan_new(lex->input, line0, col0, line0, col0);
[09ababb7]	384
[051bc69a]	385	lsp = lex->ibp;
[09ababb7]	386	bp = lex->ibp;
	387
	388	if (bp[0] == '\0') {
	389	/* End of input */
	390	lex->current.lclass = lc_eof;
[051bc69a]	391	goto finish;
[09ababb7]	392	}
	393
	394	if (is_wstart(bp[0])) {
	395	lex_word(lex);
[051bc69a]	396	goto finish;
[09ababb7]	397	}
	398
[074444f]	399	if (bp[0] == '\'') {
	400	lex_char(lex);
[051bc69a]	401	goto finish;
[074444f]	402	}
	403
[09ababb7]	404	if (is_digit(bp[0])) {
	405	lex_number(lex);
[051bc69a]	406	goto finish;
[09ababb7]	407	}
	408
	409	if (bp[0] == '"') {
	410	lex_string(lex);
[051bc69a]	411	goto finish;
[94d484a]	412	}
	413
	414	if (bp[0] == '-' && bp[1] == '-') {
	415	lex_skip_comment(lex);
[051bc69a]	416
	417	/* Compute ending column number */
	418	lex->current.cspan->col1 = col0 + (lex->ibp - lsp) - 1;
	419
	420	/* Try again */
[94d484a]	421	return b_false;
[09ababb7]	422	}
	423
	424	switch (bp[0]) {
[1433ecda]	425	case ',':
	426	lex->current.lclass = lc_comma;
	427	++bp;
	428	break;
	429	case ':':
	430	lex->current.lclass = lc_colon;
	431	++bp;
	432	break;
	433	case ';':
	434	lex->current.lclass = lc_scolon;
	435	++bp;
	436	break;
[09ababb7]	437
[1433ecda]	438	case '.':
	439	lex->current.lclass = lc_period;
	440	++bp;
	441	break;
	442	case '/':
	443	lex->current.lclass = lc_slash;
	444	++bp;
	445	break;
	446	case '(':
	447	lex->current.lclass = lc_lparen;
	448	++bp;
	449	break;
	450	case ')':
	451	lex->current.lclass = lc_rparen;
	452	++bp;
	453	break;
	454	case '[':
	455	lex->current.lclass = lc_lsbr;
	456	++bp;
	457	break;
	458	case ']':
	459	lex->current.lclass = lc_rsbr;
	460	++bp;
	461	break;
[09ababb7]	462
	463	case '=':
	464	if (bp[1] == '=') {
[1433ecda]	465	lex->current.lclass = lc_equal;
	466	bp += 2;
	467	break;
[09ababb7]	468	}
[1433ecda]	469	lex->current.lclass = lc_assign;
	470	++bp;
	471	break;
[09ababb7]	472
	473	case '!':
	474	if (bp[1] == '=') {
[1433ecda]	475	lex->current.lclass = lc_notequal;
	476	bp += 2;
	477	break;
[09ababb7]	478	}
	479	goto invalid;
	480
	481	case '+':
	482	if (bp[1] == '=') {
[1433ecda]	483	lex->current.lclass = lc_increase;
	484	bp += 2;
	485	break;
[09ababb7]	486	}
[1433ecda]	487	lex->current.lclass = lc_plus;
	488	++bp;
	489	break;
[09ababb7]	490
[23de644]	491	case '-':
[1433ecda]	492	lex->current.lclass = lc_minus;
	493	++bp;
	494	break;
[23de644]	495
	496	case '*':
[1433ecda]	497	lex->current.lclass = lc_mult;
	498	++bp;
	499	break;
[23de644]	500
[09ababb7]	501	case '<':
	502	if (bp[1] == '=') {
[1433ecda]	503	lex->current.lclass = lc_lt_equal;
	504	bp += 2;
	505	break;
[09ababb7]	506	}
[1433ecda]	507	lex->current.lclass = lc_lt;
	508	++bp;
	509	break;
[09ababb7]	510
	511	case '>':
	512	if (bp[1] == '=') {
[1433ecda]	513	lex->current.lclass = lc_gt_equal;
	514	bp += 2;
	515	break;
[09ababb7]	516	}
[1433ecda]	517	lex->current.lclass = lc_gt;
	518	++bp;
	519	break;
[09ababb7]	520
	521	default:
	522	goto invalid;
	523	}
	524
	525	lex->ibp = bp;
[051bc69a]	526
	527	finish:
	528	/* Compute ending column number */
	529	lex->current.cspan->col1 = col0 + (lex->ibp - lsp) - 1;
[94d484a]	530	return b_true;
[09ababb7]	531
	532	invalid:
	533	lex->current.lclass = lc_invalid;
	534	++bp;
	535	lex->ibp = bp;
[94d484a]	536
	537	return b_true;
[09ababb7]	538	}
	539
[1ebc1a62]	540	/** Lex a word (identifier or keyword).
	541	*
	542	* Read in a word. This may later turn out to be a keyword or a regular
	543	* identifier. It is stored in the current lem in @a lex.
	544	*
	545	* @param lex Lexer object.
	546	*/
[09ababb7]	547	static void lex_word(lex_t *lex)
	548	{
	549	struct lc_name *dp;
	550	char *bp;
	551	int idx;
	552
	553	bp = lex->ibp;
	554	ident_buf[0] = bp[0];
	555	idx = 1;
	556
	557	while (is_wcont(bp[idx])) {
	558	if (idx >= IBUF_SIZE) {
	559	printf("Error: Identifier too long.\n");
	560	exit(1);
	561	}
	562
	563	ident_buf[idx] = bp[idx];
	564	++idx;
	565	}
	566
	567	lex->ibp = bp + idx;
	568
	569	ident_buf[idx] = '\0';
	570
	571	dp = keywords;
	572	while (dp->name != NULL) {
[94d484a]	573	if (os_str_cmp(ident_buf, dp->name) == 0) {
[09ababb7]	574	/* Match */
	575	lex->current.lclass = dp->lclass;
	576	return;
	577	}
	578	++dp;
	579	}
	580
	581	/* No matching keyword -- it must be an identifier. */
	582	lex->current.lclass = lc_ident;
	583	lex->current.u.ident.sid = strtab_get_sid(ident_buf);
	584	}
	585
[074444f]	586	/** Lex a character literal.
	587	*
	588	* Reads in a character literal and stores it in the current lem in @a lex.
	589	*
	590	* @param lex Lexer object.
	591	*/
	592	static void lex_char(lex_t *lex)
	593	{
	594	size_t len;
	595	int char_val;
	596
[051b3db8]	597	lex_char_string_core(lex, cs_chr);
[074444f]	598
	599	len = os_str_length(strlit_buf);
	600	if (len != 1) {
	601	printf("Character literal should contain one character, "
	602	"but contains %u characters instead.\n", (unsigned) len);
	603	exit(1);
	604	}
	605
	606	os_str_get_char(strlit_buf, 0, &char_val);
	607	lex->current.lclass = lc_lit_char;
	608	bigint_init(&lex->current.u.lit_char.value, char_val);
	609	}
	610
[1ebc1a62]	611	/** Lex a numeric literal.
	612	*
	613	* Reads in a numeric literal and stores it in the current lem in @a lex.
	614	*
	615	* @param lex Lexer object.
	616	*/
[09ababb7]	617	static void lex_number(lex_t *lex)
	618	{
	619	char *bp;
[23de644]	620	bigint_t value;
	621	bigint_t dgval;
	622	bigint_t base;
	623	bigint_t tprod;
[09ababb7]	624
	625	bp = lex->ibp;
[23de644]	626
	627	bigint_init(&value, 0);
	628	bigint_init(&base, 10);
[09ababb7]	629
	630	while (is_digit(*bp)) {
[23de644]	631	bigint_mul(&value, &base, &tprod);
	632	bigint_init(&dgval, digit_value(*bp));
	633
	634	bigint_destroy(&value);
	635	bigint_add(&tprod, &dgval, &value);
	636	bigint_destroy(&tprod);
	637	bigint_destroy(&dgval);
	638
[09ababb7]	639	++bp;
	640	}
	641
[23de644]	642	bigint_destroy(&base);
	643
[09ababb7]	644	lex->ibp = bp;
	645
	646	lex->current.lclass = lc_lit_int;
[23de644]	647	bigint_shallow_copy(&value, &lex->current.u.lit_int.value);
[09ababb7]	648	}
	649
[1ebc1a62]	650	/** Lex a string literal.
	651	*
	652	* Reads in a string literal and stores it in the current lem in @a lex.
	653	*
	654	* @param lex Lexer object.
	655	*/
[09ababb7]	656	static void lex_string(lex_t *lex)
[051b3db8]	657	{
	658	lex_char_string_core(lex, cs_str);
	659
	660	lex->current.lclass = lc_lit_string;
	661	lex->current.u.lit_string.value = os_str_dup(strlit_buf);
	662	}
	663
	664	static void lex_char_string_core(lex_t *lex, chr_str_t cs)
[09ababb7]	665	{
	666	char *bp;
[051b3db8]	667	int sidx, didx;
	668	char term;
	669	const char descr, cap_descr;
	670	char spchar;
	671
	672	/* Make compiler happy */
	673	term = '\0';
	674	descr = NULL;
	675	cap_descr = NULL;
	676
	677	switch (cs) {
	678	case cs_chr:
	679	term = '\'';
	680	descr = "character";
	681	cap_descr = "Character";
	682	break;
	683	case cs_str:
	684	term = '"';
	685	descr = "string";
	686	cap_descr = "String";
	687	break;
	688	}
[09ababb7]	689
	690	bp = lex->ibp + 1;
[051b3db8]	691	sidx = didx = 0;
[09ababb7]	692
[051b3db8]	693	while (bp[sidx] != term) {
	694	if (didx >= SLBUF_SIZE) {
	695	printf("Error: %s literal too long.\n", cap_descr);
[09ababb7]	696	exit(1);
	697	}
	698
[051b3db8]	699	if (bp[sidx] == '\0') {
	700	printf("Error: Unterminated %s literal.\n", descr);
[09ababb7]	701	exit(1);
	702	}
	703
[051b3db8]	704	if (bp[sidx] == '\\') {
	705	switch (bp[sidx + 1]) {
	706	case '\\':
	707	spchar = '\\';
	708	break;
	709	case '\'':
	710	spchar = '\'';
	711	break;
	712	case '"':
	713	spchar = '"';
	714	break;
	715	case 'n':
	716	spchar = '\n';
	717	break;
	718	case 't':
	719	spchar = '\t';
	720	break;
	721	default:
	722	printf("Error: Unknown character escape sequence.\n");
	723	exit(1);
	724	}
[09ababb7]	725
[051b3db8]	726	strlit_buf[didx] = spchar;
	727	++didx;
	728	sidx += 2;
	729	} else {
	730	strlit_buf[didx] = bp[sidx];
[1433ecda]	731	++sidx;
	732	++didx;
[051b3db8]	733	}
	734	}
[09ababb7]	735
[051b3db8]	736	lex->ibp = bp + sidx + 1;
[09ababb7]	737
[051b3db8]	738	strlit_buf[didx] = '\0';
[09ababb7]	739	}
	740
[1ebc1a62]	741	/** Lex a single-line comment.
	742	*
	743	* This does not produce any lem. The comment is just skipped.
	744	*
	745	* @param lex Lexer object.
	746	*/
[94d484a]	747	static void lex_skip_comment(lex_t *lex)
	748	{
	749	char *bp;
	750
	751	bp = lex->ibp + 2;
	752
	753	while (bp != '\n' && bp != '\0') {
	754	++bp;
	755	}
	756
	757	lex->ibp = bp;
	758	}
[09ababb7]	759
[1ebc1a62]	760	/** Skip whitespace characters.
	761	*
	762	* This does not produce any lem. The whitespace is just skipped.
	763	*
	764	* @param lex Lexer object.
	765	*/
[09ababb7]	766	static void lex_skip_ws(lex_t *lex)
	767	{
	768	char *bp;
[b7fd2a0]	769	errno_t rc;
[09ababb7]	770
	771	bp = lex->ibp;
	772
	773	while (b_true) {
	774	while (bp == ' ' \|\| bp == '\t') {
[1ebc1a62]	775	if (*bp == '\t') {
	776	/* XXX This is too simplifed. */
[09ababb7]	777	lex->col_adj += (TAB_WIDTH - 1);
[1ebc1a62]	778	}
[09ababb7]	779	++bp;
	780	}
	781
	782	if (*bp != '\n')
	783	break;
	784
	785	/* Read next line */
	786	rc = input_get_line(lex->input, &lex->inbuf);
	787	if (rc != EOK) {
	788	printf("Error reading input.\n");
	789	exit(1);
	790	}
	791
	792	bp = lex->inbuf;
	793	lex->col_adj = 0;
	794	}
	795
	796	lex->ibp = bp;
	797	}
	798
[1ebc1a62]	799	/** Determine if character can start a word.
	800	*
	801	* @param c Character.
	802	* @return @c b_true if @a c can start a word, @c b_false otherwise.
	803	*/
[09ababb7]	804	static bool_t is_wstart(char c)
	805	{
	806	return ((c >= 'a') && (c <= 'z')) \|\| ((c >= 'A') && (c <= 'Z')) \|\|
	807	(c == '_');
	808	}
	809
[1ebc1a62]	810	/** Determine if character can continue a word.
	811	*
	812	* @param c Character.
	813	* @return @c b_true if @a c can start continue word, @c b_false
	814	* otherwise.
	815	*/
[09ababb7]	816	static bool_t is_wcont(char c)
	817	{
	818	return is_digit(c) \|\| is_wstart(c);
	819	}
	820
[1ebc1a62]	821	/** Determine if character is a numeric digit.
	822	*
	823	* @param c Character.
	824	* @return @c b_true if @a c is a numeric digit, @c b_false otherwise.
	825	*/
[09ababb7]	826	static bool_t is_digit(char c)
	827	{
	828	return ((c >= '0') && (c <= '9'));
	829	}
	830
[1ebc1a62]	831	/** Determine numeric value of digit character.
	832	*
	833	* @param c Character, must be a valid decimal digit.
	834	* @return Value of the digit (0-9).
	835	*/
[09ababb7]	836	static int digit_value(char c)
	837	{
	838	return (c - '0');
	839	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: