Context Navigation

lex.c@ 051b3db8

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 051b3db8 was 051b3db8, checked in by Jiri Svoboda <jiri@…>, 14 years ago

Update SBI to rev. 344 from upstream. What's new:

Builtin.WriteLine() renamed to Console.WriteLine()
Implemented 'switch' statement
Significantly reduced memory consumption (also increases execution speed in some cases)
Properties can be accessed via unqualified names
Exceptions raised during property accesses are now handled correctly
Some missing checks against expressions returning no value added

Property mode set to 100644

File size: 16.2 KB

Rev	Line
[09ababb7]	1	/*
[051b3db8]	2	* Copyright (c) 2011 Jiri Svoboda
[09ababb7]	3	* All rights reserved.
	4	*
	5	* Redistribution and use in source and binary forms, with or without
	6	* modification, are permitted provided that the following conditions
	7	* are met:
	8	*
	9	* - Redistributions of source code must retain the above copyright
	10	* notice, this list of conditions and the following disclaimer.
	11	* - Redistributions in binary form must reproduce the above copyright
	12	* notice, this list of conditions and the following disclaimer in the
	13	* documentation and/or other materials provided with the distribution.
	14	* - The name of the author may not be used to endorse or promote products
	15	* derived from this software without specific prior written permission.
	16	*
	17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	27	*/
	28
	29	/** @file Lexer (lexical analyzer).
	30	*
	31	* Consumes a text file and produces a sequence of lexical elements (lems).
	32	*/
	33
	34	#include <stdio.h>
	35	#include <stdlib.h>
[23de644]	36	#include "bigint.h"
[051bc69a]	37	#include "cspan.h"
[09ababb7]	38	#include "mytypes.h"
	39	#include "input.h"
[94d484a]	40	#include "os/os.h"
[09ababb7]	41	#include "strtab.h"
	42
	43	#include "lex.h"
	44
	45	#define TAB_WIDTH 8
	46
[051b3db8]	47	typedef enum {
	48	cs_chr,
	49	cs_str
	50	} chr_str_t;
	51
[37f527b]	52	static void lex_touch(lex_t *lex);
	53	static bool_t lex_read_try(lex_t *lex);
[94d484a]	54
	55	static void lex_skip_comment(lex_t *lex);
[09ababb7]	56	static void lex_skip_ws(lex_t *lex);
	57	static bool_t is_wstart(char c);
	58	static bool_t is_wcont(char c);
	59	static bool_t is_digit(char c);
	60	static void lex_word(lex_t *lex);
[074444f]	61	static void lex_char(lex_t *lex);
[09ababb7]	62	static void lex_number(lex_t *lex);
	63	static void lex_string(lex_t *lex);
[051b3db8]	64	static void lex_char_string_core(lex_t *lex, chr_str_t cs);
[09ababb7]	65	static int digit_value(char c);
	66
	67	/* Note: This imposes an implementation limit on identifier length. */
	68	#define IBUF_SIZE 128
	69	static char ident_buf[IBUF_SIZE + 1];
	70
	71	/* XXX This imposes an implementation limit on string literal length. */
	72	#define SLBUF_SIZE 128
	73	static char strlit_buf[SLBUF_SIZE + 1];
	74
	75	/** Lclass-string pair */
	76	struct lc_name {
	77	lclass_t lclass;
	78	const char *name;
	79	};
	80
	81	/** Keyword names. Used both for printing and recognition. */
	82	static struct lc_name keywords[] = {
[051bc69a]	83	{ lc_and, "and" },
[37f527b]	84	{ lc_as, "as" },
[074444f]	85	{ lc_bool, "bool" },
[051bc69a]	86	{ lc_break, "break" },
[37f527b]	87	{ lc_builtin, "builtin" },
[051bc69a]	88	{ lc_char, "char" },
[09ababb7]	89	{ lc_class, "class" },
[38aaacc2]	90	{ lc_deleg, "deleg" },
[09ababb7]	91	{ lc_do, "do" },
[051bc69a]	92	{ lc_elif, "elif" },
[09ababb7]	93	{ lc_else, "else" },
	94	{ lc_end, "end" },
[051bc69a]	95	{ lc_enum, "enum" },
[09ababb7]	96	{ lc_except, "except" },
[074444f]	97	{ lc_false, "false" },
[09ababb7]	98	{ lc_finally, "finally" },
	99	{ lc_for, "for" },
	100	{ lc_fun, "fun" },
	101	{ lc_get, "get" },
	102	{ lc_if, "if" },
	103	{ lc_in, "in" },
	104	{ lc_int, "int" },
	105	{ lc_interface, "interface" },
	106	{ lc_is, "is" },
[fa36f29]	107	{ lc_new, "new" },
[051bc69a]	108	{ lc_not, "not" },
[fa36f29]	109	{ lc_nil, "nil" },
[051bc69a]	110	{ lc_or, "or" },
[09ababb7]	111	{ lc_override, "override" },
[94d484a]	112	{ lc_packed, "packed" },
[09ababb7]	113	{ lc_private, "private" },
	114	{ lc_prop, "prop" },
	115	{ lc_protected, "protected" },
	116	{ lc_public, "public" },
	117	{ lc_raise, "raise" },
[37f527b]	118	{ lc_resource, "resource" },
[09ababb7]	119	{ lc_return, "return" },
[fa36f29]	120	{ lc_self, "self" },
[09ababb7]	121	{ lc_set, "set" },
	122	{ lc_static, "static" },
	123	{ lc_string, "string" },
	124	{ lc_struct, "struct" },
[051b3db8]	125	{ lc_switch, "switch" },
[09ababb7]	126	{ lc_then, "then" },
	127	{ lc_this, "this" },
[074444f]	128	{ lc_true, "true" },
[09ababb7]	129	{ lc_var, "var" },
	130	{ lc_with, "with" },
[051b3db8]	131	{ lc_when, "when" },
[09ababb7]	132	{ lc_while, "while" },
	133	{ lc_yield, "yield" },
	134
	135	{ 0, NULL }
	136	};
	137
	138	/** Other simple lclasses. Only used for printing. */
	139	static struct lc_name simple_lc[] = {
	140	{ lc_invalid, "INVALID" },
	141	{ lc_eof, "EOF" },
	142
	143	/* Operators */
	144	{ lc_period, "." },
	145	{ lc_slash, "/" },
	146	{ lc_lparen, "(" },
	147	{ lc_rparen, ")" },
	148	{ lc_lsbr, "[" },
	149	{ lc_rsbr, "]" },
	150	{ lc_equal, "==" },
	151	{ lc_notequal, "!=" },
	152	{ lc_lt, "<" },
	153	{ lc_gt, ">" },
	154	{ lc_lt_equal, "<=" },
	155	{ lc_gt_equal, ">=" },
	156	{ lc_assign, "=" },
	157	{ lc_plus, "+" },
[23de644]	158	{ lc_minus, "-" },
	159	{ lc_mult, "*" },
[09ababb7]	160	{ lc_increase, "+=" },
	161
	162	/* Punctuators */
	163	{ lc_comma, "," },
	164	{ lc_colon, ":" },
	165	{ lc_scolon, ";" },
	166
	167	{ 0, NULL },
	168	};
	169
[1ebc1a62]	170	/** Print lclass value.
	171	*
	172	* Prints lclass (lexical element class) value in human-readable form
	173	* (for debugging).
	174	*
	175	* @param lclass Lclass value for display.
	176	*/
[09ababb7]	177	void lclass_print(lclass_t lclass)
	178	{
	179	struct lc_name *dp;
	180
	181	dp = keywords;
	182	while (dp->name != NULL) {
	183	if (dp->lclass == lclass) {
	184	printf("%s", dp->name);
	185	return;
	186	}
	187	++dp;
	188	}
	189
	190	dp = simple_lc;
	191	while (dp->name != NULL) {
	192	if (dp->lclass == lclass) {
	193	printf("%s", dp->name);
	194	return;
	195	}
	196	++dp;
	197	}
	198
	199	switch (lclass) {
	200	case lc_ident:
	201	printf("ident");
	202	break;
	203	case lc_lit_int:
	204	printf("int_literal");
	205	break;
	206	case lc_lit_string:
	207	printf("string_literal");
	208	break;
	209	default:
	210	printf("<unknown?>");
	211	break;
	212	}
	213	}
	214
[1ebc1a62]	215	/** Print lexical element.
	216	*
	217	* Prints lexical element in human-readable form (for debugging).
	218	*
	219	* @param lem Lexical element for display.
	220	*/
[09ababb7]	221	void lem_print(lem_t *lem)
	222	{
	223	lclass_print(lem->lclass);
	224
	225	switch (lem->lclass) {
	226	case lc_ident:
[38aaacc2]	227	printf("('%s')", strtab_get_str(lem->u.ident.sid));
[09ababb7]	228	break;
	229	case lc_lit_int:
[23de644]	230	printf("(");
	231	bigint_print(&lem->u.lit_int.value);
	232	printf(")");
[09ababb7]	233	break;
	234	case lc_lit_string:
	235	printf("(\"%s\")", lem->u.lit_string.value);
	236	default:
	237	break;
	238	}
	239	}
	240
[1ebc1a62]	241	/** Print lem coordinates.
	242	*
	243	* Print the coordinates (line number, column number) of a lexical element.
	244	*
	245	* @param lem Lexical element for coordinate printing.
	246	*/
[09ababb7]	247	void lem_print_coords(lem_t *lem)
	248	{
[051bc69a]	249	cspan_print(lem->cspan);
[09ababb7]	250	}
	251
[1ebc1a62]	252	/** Initialize lexer instance.
	253	*
	254	* @param lex Lexer object to initialize.
	255	* @param input Input to associate with lexer.
	256	*/
[09ababb7]	257	void lex_init(lex_t lex, struct input input)
	258	{
	259	int rc;
	260
	261	lex->input = input;
	262
	263	rc = input_get_line(lex->input, &lex->inbuf);
	264	if (rc != EOK) {
	265	printf("Error reading input.\n");
	266	exit(1);
	267	}
	268
	269	lex->ibp = lex->inbuf;
	270	lex->col_adj = 0;
[051bc69a]	271	lex->prev_valid = b_false;
[37f527b]	272	lex->current_valid = b_true;
[09ababb7]	273	}
	274
[37f527b]	275	/** Advance to next lexical element.
	276	*
[1ebc1a62]	277	* The new element is read in lazily then it is actually accessed.
	278	*
	279	* @param lex Lexer object.
[37f527b]	280	*/
[09ababb7]	281	void lex_next(lex_t *lex)
[37f527b]	282	{
	283	/* Make sure the current lem has already been read in. */
	284	lex_touch(lex);
	285
	286	/* Force a new lem to be read on next access. */
	287	lex->current_valid = b_false;
	288	}
	289
	290	/** Get current lem.
	291	*
	292	* The returned pointer is invalidated by next call to lex_next()
[1ebc1a62]	293	*
	294	* @param lex Lexer object.
[23de644]	295	* @return Pointer to current lem. Owned by @a lex and only valid
[051bc69a]	296	* until next call to lex_xxx().
[37f527b]	297	*/
	298	lem_t lex_get_current(lex_t lex)
	299	{
	300	lex_touch(lex);
	301	return &lex->current;
	302	}
	303
[051bc69a]	304	/** Get previous lem if valid.
	305	*
	306	* The returned pointer is invalidated by next call to lex_next()
	307	*
	308	* @param lex Lexer object.
	309	* @return Pointer to previous lem. Owned by @a lex and only valid
	310	* until next call to lex_xxx().
	311	*/
	312	lem_t lex_peek_prev(lex_t lex)
	313	{
	314	if (lex->current_valid == b_false) {
	315	/*
	316	* This means the head is advanced but next lem was not read.
	317	* Thus the previous lem is still in @a current.
	318	*/
	319	return &lex->current;
	320	}
	321
	322	if (lex->prev_valid != b_true) {
	323	/* Looks like we are still at the first lem. */
	324	return NULL;
	325	}
	326
	327	/*
	328	* Current lem has been read in. Thus the previous lem was moved to
	329	* @a previous.
	330	*/
	331	return &lex->prev;
	332	}
	333
[1ebc1a62]	334	/** Read in the current lexical element (unless already read in).
	335	*
	336	* @param lex Lexer object.
	337	*/
[37f527b]	338	static void lex_touch(lex_t *lex)
[94d484a]	339	{
	340	bool_t got_lem;
	341
[37f527b]	342	if (lex->current_valid == b_true)
	343	return;
	344
[051bc69a]	345	/* Copy previous lem */
	346	lex->prev = lex->current;
	347	lex->prev_valid = b_true;
	348
[94d484a]	349	do {
[37f527b]	350	got_lem = lex_read_try(lex);
[94d484a]	351	} while (got_lem == b_false);
[37f527b]	352
	353	lex->current_valid = b_true;
[94d484a]	354	}
	355
	356	/** Try reading next lexical element.
	357	*
[1ebc1a62]	358	* Attemps to read the next lexical element. In some cases (such as a comment)
	359	* this function will need to give it another try and returns @c b_false
	360	* in such case.
	361	*
	362	* @param lex Lexer object.
	363	* @return @c b_true on success or @c b_false if it needs
	364	* restarting. On success the lem is stored to
	365	* the current lem in @a lex.
[94d484a]	366	*/
[37f527b]	367	static bool_t lex_read_try(lex_t *lex)
[09ababb7]	368	{
[051bc69a]	369	char bp, lsp;
	370	int line0, col0;
[09ababb7]	371
	372	lex_skip_ws(lex);
	373
	374	/*
	375	* Record lem coordinates. Line number we already have. For column
	376	* number we start with position in the input buffer. This works
	377	* for all characters except tab. Thus we keep track of tabs
	378	* separately using col_adj.
	379	*/
[051bc69a]	380	line0 = input_get_line_no(lex->input);
	381	col0 = 1 + lex->col_adj + (lex->ibp - lex->inbuf);
	382
	383	lex->current.cspan = cspan_new(lex->input, line0, col0, line0, col0);
[09ababb7]	384
[051bc69a]	385	lsp = lex->ibp;
[09ababb7]	386	bp = lex->ibp;
	387
	388	if (bp[0] == '\0') {
	389	/* End of input */
	390	lex->current.lclass = lc_eof;
[051bc69a]	391	goto finish;
[09ababb7]	392	}
	393
	394	if (is_wstart(bp[0])) {
	395	lex_word(lex);
[051bc69a]	396	goto finish;
[09ababb7]	397	}
	398
[074444f]	399	if (bp[0] == '\'') {
	400	lex_char(lex);
[051bc69a]	401	goto finish;
[074444f]	402	}
	403
[09ababb7]	404	if (is_digit(bp[0])) {
	405	lex_number(lex);
[051bc69a]	406	goto finish;
[09ababb7]	407	}
	408
	409	if (bp[0] == '"') {
	410	lex_string(lex);
[051bc69a]	411	goto finish;
[94d484a]	412	}
	413
	414	if (bp[0] == '-' && bp[1] == '-') {
	415	lex_skip_comment(lex);
[051bc69a]	416
	417	/* Compute ending column number */
	418	lex->current.cspan->col1 = col0 + (lex->ibp - lsp) - 1;
	419
	420	/* Try again */
[94d484a]	421	return b_false;
[09ababb7]	422	}
	423
	424	switch (bp[0]) {
	425	case ',': lex->current.lclass = lc_comma; ++bp; break;
	426	case ':': lex->current.lclass = lc_colon; ++bp; break;
	427	case ';': lex->current.lclass = lc_scolon; ++bp; break;
	428
	429	case '.': lex->current.lclass = lc_period; ++bp; break;
	430	case '/': lex->current.lclass = lc_slash; ++bp; break;
	431	case '(': lex->current.lclass = lc_lparen; ++bp; break;
	432	case ')': lex->current.lclass = lc_rparen; ++bp; break;
	433	case '[': lex->current.lclass = lc_lsbr; ++bp; break;
	434	case ']': lex->current.lclass = lc_rsbr; ++bp; break;
	435
	436	case '=':
	437	if (bp[1] == '=') {
	438	lex->current.lclass = lc_equal; bp += 2; break;
	439	}
	440	lex->current.lclass = lc_assign; ++bp; break;
	441
	442	case '!':
	443	if (bp[1] == '=') {
	444	lex->current.lclass = lc_notequal; bp += 2; break;
	445	}
	446	goto invalid;
	447
	448	case '+':
	449	if (bp[1] == '=') {
	450	lex->current.lclass = lc_increase; bp += 2; break;
	451	}
	452	lex->current.lclass = lc_plus; ++bp; break;
	453
[23de644]	454	case '-':
	455	lex->current.lclass = lc_minus; ++bp; break;
	456
	457	case '*':
	458	lex->current.lclass = lc_mult; ++bp; break;
	459
[09ababb7]	460	case '<':
	461	if (bp[1] == '=') {
	462	lex->current.lclass = lc_lt_equal; bp += 2; break;
	463	}
	464	lex->current.lclass = lc_lt; ++bp; break;
	465
	466	case '>':
	467	if (bp[1] == '=') {
	468	lex->current.lclass = lc_gt_equal; bp += 2; break;
	469	}
	470	lex->current.lclass = lc_gt; ++bp; break;
	471
	472	default:
	473	goto invalid;
	474	}
	475
	476	lex->ibp = bp;
[051bc69a]	477
	478	finish:
	479	/* Compute ending column number */
	480	lex->current.cspan->col1 = col0 + (lex->ibp - lsp) - 1;
[94d484a]	481	return b_true;
[09ababb7]	482
	483	invalid:
	484	lex->current.lclass = lc_invalid;
	485	++bp;
	486	lex->ibp = bp;
[94d484a]	487
	488	return b_true;
[09ababb7]	489	}
	490
[1ebc1a62]	491	/** Lex a word (identifier or keyword).
	492	*
	493	* Read in a word. This may later turn out to be a keyword or a regular
	494	* identifier. It is stored in the current lem in @a lex.
	495	*
	496	* @param lex Lexer object.
	497	*/
[09ababb7]	498	static void lex_word(lex_t *lex)
	499	{
	500	struct lc_name *dp;
	501	char *bp;
	502	int idx;
	503
	504	bp = lex->ibp;
	505	ident_buf[0] = bp[0];
	506	idx = 1;
	507
	508	while (is_wcont(bp[idx])) {
	509	if (idx >= IBUF_SIZE) {
	510	printf("Error: Identifier too long.\n");
	511	exit(1);
	512	}
	513
	514	ident_buf[idx] = bp[idx];
	515	++idx;
	516	}
	517
	518	lex->ibp = bp + idx;
	519
	520	ident_buf[idx] = '\0';
	521
	522	dp = keywords;
	523	while (dp->name != NULL) {
[94d484a]	524	if (os_str_cmp(ident_buf, dp->name) == 0) {
[09ababb7]	525	/* Match */
	526	lex->current.lclass = dp->lclass;
	527	return;
	528	}
	529	++dp;
	530	}
	531
	532	/* No matching keyword -- it must be an identifier. */
	533	lex->current.lclass = lc_ident;
	534	lex->current.u.ident.sid = strtab_get_sid(ident_buf);
	535	}
	536
[074444f]	537	/** Lex a character literal.
	538	*
	539	* Reads in a character literal and stores it in the current lem in @a lex.
	540	*
	541	* @param lex Lexer object.
	542	*/
	543	static void lex_char(lex_t *lex)
	544	{
	545	size_t len;
	546	int char_val;
	547
[051b3db8]	548	lex_char_string_core(lex, cs_chr);
[074444f]	549
	550	len = os_str_length(strlit_buf);
	551	if (len != 1) {
	552	printf("Character literal should contain one character, "
	553	"but contains %u characters instead.\n", (unsigned) len);
	554	exit(1);
	555	}
	556
	557	os_str_get_char(strlit_buf, 0, &char_val);
	558	lex->current.lclass = lc_lit_char;
	559	bigint_init(&lex->current.u.lit_char.value, char_val);
	560	}
	561
[1ebc1a62]	562	/** Lex a numeric literal.
	563	*
	564	* Reads in a numeric literal and stores it in the current lem in @a lex.
	565	*
	566	* @param lex Lexer object.
	567	*/
[09ababb7]	568	static void lex_number(lex_t *lex)
	569	{
	570	char *bp;
[23de644]	571	bigint_t value;
	572	bigint_t dgval;
	573	bigint_t base;
	574	bigint_t tprod;
[09ababb7]	575
	576	bp = lex->ibp;
[23de644]	577
	578	bigint_init(&value, 0);
	579	bigint_init(&base, 10);
[09ababb7]	580
	581	while (is_digit(*bp)) {
[23de644]	582	bigint_mul(&value, &base, &tprod);
	583	bigint_init(&dgval, digit_value(*bp));
	584
	585	bigint_destroy(&value);
	586	bigint_add(&tprod, &dgval, &value);
	587	bigint_destroy(&tprod);
	588	bigint_destroy(&dgval);
	589
[09ababb7]	590	++bp;
	591	}
	592
[23de644]	593	bigint_destroy(&base);
	594
[09ababb7]	595	lex->ibp = bp;
	596
	597	lex->current.lclass = lc_lit_int;
[23de644]	598	bigint_shallow_copy(&value, &lex->current.u.lit_int.value);
[09ababb7]	599	}
	600
[1ebc1a62]	601	/** Lex a string literal.
	602	*
	603	* Reads in a string literal and stores it in the current lem in @a lex.
	604	*
	605	* @param lex Lexer object.
	606	*/
[09ababb7]	607	static void lex_string(lex_t *lex)
[051b3db8]	608	{
	609	lex_char_string_core(lex, cs_str);
	610
	611	lex->current.lclass = lc_lit_string;
	612	lex->current.u.lit_string.value = os_str_dup(strlit_buf);
	613	}
	614
	615	static void lex_char_string_core(lex_t *lex, chr_str_t cs)
[09ababb7]	616	{
	617	char *bp;
[051b3db8]	618	int sidx, didx;
	619	char term;
	620	const char descr, cap_descr;
	621	char spchar;
	622
	623	/* Make compiler happy */
	624	term = '\0';
	625	descr = NULL;
	626	cap_descr = NULL;
	627
	628	switch (cs) {
	629	case cs_chr:
	630	term = '\'';
	631	descr = "character";
	632	cap_descr = "Character";
	633	break;
	634	case cs_str:
	635	term = '"';
	636	descr = "string";
	637	cap_descr = "String";
	638	break;
	639	}
[09ababb7]	640
	641	bp = lex->ibp + 1;
[051b3db8]	642	sidx = didx = 0;
[09ababb7]	643
[051b3db8]	644	while (bp[sidx] != term) {
	645	if (didx >= SLBUF_SIZE) {
	646	printf("Error: %s literal too long.\n", cap_descr);
[09ababb7]	647	exit(1);
	648	}
	649
[051b3db8]	650	if (bp[sidx] == '\0') {
	651	printf("Error: Unterminated %s literal.\n", descr);
[09ababb7]	652	exit(1);
	653	}
	654
[051b3db8]	655	if (bp[sidx] == '\\') {
	656	switch (bp[sidx + 1]) {
	657	case '\\':
	658	spchar = '\\';
	659	break;
	660	case '\'':
	661	spchar = '\'';
	662	break;
	663	case '"':
	664	spchar = '"';
	665	break;
	666	case 'n':
	667	spchar = '\n';
	668	break;
	669	case 't':
	670	spchar = '\t';
	671	break;
	672	default:
	673	printf("Error: Unknown character escape sequence.\n");
	674	exit(1);
	675	}
[09ababb7]	676
[051b3db8]	677	strlit_buf[didx] = spchar;
	678	++didx;
	679	sidx += 2;
	680	} else {
	681	strlit_buf[didx] = bp[sidx];
	682	++sidx; ++didx;
	683	}
	684	}
[09ababb7]	685
[051b3db8]	686	lex->ibp = bp + sidx + 1;
[09ababb7]	687
[051b3db8]	688	strlit_buf[didx] = '\0';
[09ababb7]	689	}
	690
[1ebc1a62]	691	/** Lex a single-line comment.
	692	*
	693	* This does not produce any lem. The comment is just skipped.
	694	*
	695	* @param lex Lexer object.
	696	*/
[94d484a]	697	static void lex_skip_comment(lex_t *lex)
	698	{
	699	char *bp;
	700
	701	bp = lex->ibp + 2;
	702
	703	while (bp != '\n' && bp != '\0') {
	704	++bp;
	705	}
	706
	707	lex->ibp = bp;
	708	}
[09ababb7]	709
[1ebc1a62]	710	/** Skip whitespace characters.
	711	*
	712	* This does not produce any lem. The whitespace is just skipped.
	713	*
	714	* @param lex Lexer object.
	715	*/
[09ababb7]	716	static void lex_skip_ws(lex_t *lex)
	717	{
	718	char *bp;
	719	int rc;
	720
	721	bp = lex->ibp;
	722
	723	while (b_true) {
	724	while (bp == ' ' \|\| bp == '\t') {
[1ebc1a62]	725	if (*bp == '\t') {
	726	/* XXX This is too simplifed. */
[09ababb7]	727	lex->col_adj += (TAB_WIDTH - 1);
[1ebc1a62]	728	}
[09ababb7]	729	++bp;
	730	}
	731
	732	if (*bp != '\n')
	733	break;
	734
	735	/* Read next line */
	736	rc = input_get_line(lex->input, &lex->inbuf);
	737	if (rc != EOK) {
	738	printf("Error reading input.\n");
	739	exit(1);
	740	}
	741
	742	bp = lex->inbuf;
	743	lex->col_adj = 0;
	744	}
	745
	746	lex->ibp = bp;
	747	}
	748
[1ebc1a62]	749	/** Determine if character can start a word.
	750	*
	751	* @param c Character.
	752	* @return @c b_true if @a c can start a word, @c b_false otherwise.
	753	*/
[09ababb7]	754	static bool_t is_wstart(char c)
	755	{
	756	return ((c >= 'a') && (c <= 'z')) \|\| ((c >= 'A') && (c <= 'Z')) \|\|
	757	(c == '_');
	758	}
	759
[1ebc1a62]	760	/** Determine if character can continue a word.
	761	*
	762	* @param c Character.
	763	* @return @c b_true if @a c can start continue word, @c b_false
	764	* otherwise.
	765	*/
[09ababb7]	766	static bool_t is_wcont(char c)
	767	{
	768	return is_digit(c) \|\| is_wstart(c);
	769	}
	770
[1ebc1a62]	771	/** Determine if character is a numeric digit.
	772	*
	773	* @param c Character.
	774	* @return @c b_true if @a c is a numeric digit, @c b_false otherwise.
	775	*/
[09ababb7]	776	static bool_t is_digit(char c)
	777	{
	778	return ((c >= '0') && (c <= '9'));
	779	}
	780
[1ebc1a62]	781	/** Determine numeric value of digit character.
	782	*
	783	* @param c Character, must be a valid decimal digit.
	784	* @return Value of the digit (0-9).
	785	*/
[09ababb7]	786	static int digit_value(char c)
	787	{
	788	return (c - '0');
	789	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/app/sbi/src/lex.c@ 051b3db8

Download in other formats: