Context Navigation

tok.c@ eb748a0

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since eb748a0 was a35b458, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 7 years ago

style: Remove trailing whitespace on _all_ lines, including empty ones, for particular file types.

Command used: tools/srepl '\s\+$' '' -- *.c *.h *.py *.sh *.s *.S *.ag

Currently, whitespace on empty lines is very inconsistent.
There are two basic choices: Either remove the whitespace, or keep empty lines
indented to the level of surrounding code. The former is AFAICT more common,
and also much easier to do automatically.

Alternatively, we could write script for automatic indentation, and use that
instead. However, if such a script exists, it's possible to use the indented
style locally, by having the editor apply relevant conversions on load/save,
without affecting remote repository. IMO, it makes more sense to adopt
the simpler rule.

Property mode set to 100644

File size: 7.3 KB

Line
1	/*
2	* Copyright (c) 2011 Martin Sucha
3	* All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	*
9	* - Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	* - Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* - The name of the author may not be used to endorse or promote products
15	* derived from this software without specific prior written permission.
16	*
17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	#include <str.h>
30	#include <assert.h>
31	#include <stdlib.h>
32	#include <stddef.h>
33	#include <errno.h>
34
35	#include "tok.h"
36
37	/* Forward declarations of static functions */
38	static wchar_t tok_get_char(tokenizer_t *);
39	static wchar_t tok_look_char(tokenizer_t *);
40	static errno_t tok_push_char(tokenizer_t *, wchar_t);
41	static errno_t tok_push_token(tokenizer_t *);
42	static bool tok_pending_chars(tokenizer_t *);
43	static errno_t tok_finish_string(tokenizer_t *);
44	static void tok_start_token(tokenizer_t *, token_type_t);
45
46	/** Initialize the token parser
47	*
48	* @param tok the tokenizer structure to initialize
49	* @param input the input string to tokenize
50	* @param out_tokens array of strings where to store the result
51	* @param max_tokens number of elements of the out_tokens array
52	*/
53	errno_t tok_init(tokenizer_t tok, char input, token_t *out_tokens,
54	size_t max_tokens)
55	{
56	tok->in = input;
57	tok->in_offset = 0;
58	tok->last_in_offset = 0;
59	tok->in_char_offset = 0;
60	tok->last_in_char_offset = 0;
61
62	tok->outtok = out_tokens;
63	tok->outtok_offset = 0;
64	tok->outtok_size = max_tokens;
65
66	/* Prepare a buffer where all the token strings will be stored */
67	size_t len = str_size(input) + max_tokens + 1;
68	char *tmp = malloc(len);
69
70	if (tmp == NULL) {
71	return ENOMEM;
72	}
73
74	tok->outbuf = tmp;
75	tok->outbuf_offset = 0;
76	tok->outbuf_size = len;
77	tok->outbuf_last_start = 0;
78
79	return EOK;
80	}
81
82	/** Finalize the token parser */
83	void tok_fini(tokenizer_t *tok)
84	{
85	if (tok->outbuf != NULL) {
86	free(tok->outbuf);
87	}
88	}
89
90	/** Tokenize the input string into the tokens */
91	errno_t tok_tokenize(tokenizer_t tok, size_t tokens_length)
92	{
93	errno_t rc;
94	wchar_t next_char;
95
96	/* Read the input line char by char and append tokens */
97	while ((next_char = tok_look_char(tok)) != 0) {
98	if (next_char == ' ') {
99	/* Push the token if there is any.
100	* There may not be any pending char for a token in case
101	* there are several spaces in the input.
102	*/
103	if (tok_pending_chars(tok)) {
104	rc = tok_push_token(tok);
105	if (rc != EOK) {
106	return rc;
107	}
108	}
109	tok_start_token(tok, TOKTYPE_SPACE);
110	/* Eat all the spaces */
111	while (tok_look_char(tok) == ' ') {
112	tok_push_char(tok, tok_get_char(tok));
113	}
114	tok_push_token(tok);
115
116	}
117	else if (next_char == '\|') {
118	/* Pipes are tokens that are delimiters and should be
119	* output as a separate token
120	*/
121	if (tok_pending_chars(tok)) {
122	rc = tok_push_token(tok);
123	if (rc != EOK) {
124	return rc;
125	}
126	}
127
128	tok_start_token(tok, TOKTYPE_PIPE);
129
130	rc = tok_push_char(tok, tok_get_char(tok));
131	if (rc != EOK) {
132	return rc;
133	}
134
135	rc = tok_push_token(tok);
136	if (rc != EOK) {
137	return rc;
138	}
139	}
140	else if (next_char == '\'') {
141	/* A string starts with a quote (') and ends again with a quote.
142	* A literal quote is written as ''
143	*/
144	tok_start_token(tok, TOKTYPE_TEXT);
145	/* Eat the quote */
146	tok_get_char(tok);
147	rc = tok_finish_string(tok);
148	if (rc != EOK) {
149	return rc;
150	}
151	}
152	else {
153	if (!tok_pending_chars(tok)) {
154	tok_start_token(tok, TOKTYPE_TEXT);
155	}
156	/* If we are handling any other character, just append it to
157	* the current token.
158	*/
159	rc = tok_push_char(tok, tok_get_char(tok));
160	if (rc != EOK) {
161	return rc;
162	}
163	}
164	}
165
166	/* Push the last token */
167	if (tok_pending_chars(tok)) {
168	rc = tok_push_token(tok);
169	if (rc != EOK) {
170	return rc;
171	}
172	}
173
174	*tokens_length = tok->outtok_offset;
175
176	return EOK;
177	}
178
179	/** Finish tokenizing an opened string */
180	errno_t tok_finish_string(tokenizer_t *tok)
181	{
182	errno_t rc;
183	wchar_t next_char;
184
185	while ((next_char = tok_look_char(tok)) != 0) {
186	if (next_char == '\'') {
187	/* Eat the quote */
188	tok_get_char(tok);
189	if (tok_look_char(tok) == '\'') {
190	/* Encode a single literal quote */
191	rc = tok_push_char(tok, '\'');
192	if (rc != EOK) {
193	return rc;
194	}
195
196	/* Swallow the additional one in the input */
197	tok_get_char(tok);
198	}
199	else {
200	/* The string end */
201	return tok_push_token(tok);
202	}
203	}
204	else {
205	rc = tok_push_char(tok, tok_get_char(tok));
206	if (rc != EOK) {
207	return rc;
208	}
209	}
210	}
211
212	/* If we are here, the string run to the end without being closed */
213	return EINVAL;
214	}
215
216	/** Get a char from input, advancing the input position */
217	wchar_t tok_get_char(tokenizer_t *tok)
218	{
219	tok->in_char_offset++;
220	return str_decode(tok->in, &tok->in_offset, STR_NO_LIMIT);
221	}
222
223	/** Get a char from input, while staying on the same input position */
224	wchar_t tok_look_char(tokenizer_t *tok)
225	{
226	size_t old_offset = tok->in_offset;
227	size_t old_char_offset = tok->in_char_offset;
228	wchar_t ret = tok_get_char(tok);
229	tok->in_offset = old_offset;
230	tok->in_char_offset = old_char_offset;
231	return ret;
232	}
233
234	/** Append a char to the end of the current token */
235	errno_t tok_push_char(tokenizer_t *tok, wchar_t ch)
236	{
237	return chr_encode(ch, tok->outbuf, &tok->outbuf_offset, tok->outbuf_size);
238	}
239
240	void tok_start_token(tokenizer_t *tok, token_type_t type)
241	{
242	tok->current_type = type;
243	}
244
245	/** Push the current token to the output array */
246	errno_t tok_push_token(tokenizer_t *tok)
247	{
248	if (tok->outtok_offset >= tok->outtok_size) {
249	return EOVERFLOW;
250	}
251
252	if (tok->outbuf_offset >= tok->outbuf_size) {
253	return EOVERFLOW;
254	}
255
256	tok->outbuf[tok->outbuf_offset++] = 0;
257	token_t *tokinfo = &tok->outtok[tok->outtok_offset++];
258	tokinfo->type = tok->current_type;
259	tokinfo->text = tok->outbuf + tok->outbuf_last_start;
260	tokinfo->byte_start = tok->last_in_offset;
261	tokinfo->byte_length = tok->in_offset - tok->last_in_offset;
262	tokinfo->char_start = tok->last_in_char_offset;
263	tokinfo->char_length = tok->in_char_offset - tok->last_in_char_offset;
264	tok->outbuf_last_start = tok->outbuf_offset;
265
266	/* We have consumed the first char of the next token already */
267	tok->last_in_offset = tok->in_offset;
268	tok->last_in_char_offset = tok->in_char_offset;
269
270	return EOK;
271	}
272
273	/** Return true, if the current token is not empty */
274	bool tok_pending_chars(tokenizer_t *tok)
275	{
276	assert(tok->outbuf_offset >= tok->outbuf_last_start);
277	return (tok->outbuf_offset != tok->outbuf_last_start);
278	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/app/bdsh/tok.c@ eb748a0

Download in other formats: