source: mainline/uspace/app/bdsh/tok.c@ eb748a0

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since eb748a0 was a35b458, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 7 years ago

style: Remove trailing whitespace on _all_ lines, including empty ones, for particular file types.

Command used: tools/srepl '\s\+$' '' -- *.c *.h *.py *.sh *.s *.S *.ag

Currently, whitespace on empty lines is very inconsistent.
There are two basic choices: Either remove the whitespace, or keep empty lines
indented to the level of surrounding code. The former is AFAICT more common,
and also much easier to do automatically.

Alternatively, we could write script for automatic indentation, and use that
instead. However, if such a script exists, it's possible to use the indented
style locally, by having the editor apply relevant conversions on load/save,
without affecting remote repository. IMO, it makes more sense to adopt
the simpler rule.

  • Property mode set to 100644
File size: 7.3 KB
Line 
1/*
2 * Copyright (c) 2011 Martin Sucha
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <str.h>
30#include <assert.h>
31#include <stdlib.h>
32#include <stddef.h>
33#include <errno.h>
34
35#include "tok.h"
36
37/* Forward declarations of static functions */
38static wchar_t tok_get_char(tokenizer_t *);
39static wchar_t tok_look_char(tokenizer_t *);
40static errno_t tok_push_char(tokenizer_t *, wchar_t);
41static errno_t tok_push_token(tokenizer_t *);
42static bool tok_pending_chars(tokenizer_t *);
43static errno_t tok_finish_string(tokenizer_t *);
44static void tok_start_token(tokenizer_t *, token_type_t);
45
46/** Initialize the token parser
47 *
48 * @param tok the tokenizer structure to initialize
49 * @param input the input string to tokenize
50 * @param out_tokens array of strings where to store the result
51 * @param max_tokens number of elements of the out_tokens array
52 */
53errno_t tok_init(tokenizer_t *tok, char *input, token_t *out_tokens,
54 size_t max_tokens)
55{
56 tok->in = input;
57 tok->in_offset = 0;
58 tok->last_in_offset = 0;
59 tok->in_char_offset = 0;
60 tok->last_in_char_offset = 0;
61
62 tok->outtok = out_tokens;
63 tok->outtok_offset = 0;
64 tok->outtok_size = max_tokens;
65
66 /* Prepare a buffer where all the token strings will be stored */
67 size_t len = str_size(input) + max_tokens + 1;
68 char *tmp = malloc(len);
69
70 if (tmp == NULL) {
71 return ENOMEM;
72 }
73
74 tok->outbuf = tmp;
75 tok->outbuf_offset = 0;
76 tok->outbuf_size = len;
77 tok->outbuf_last_start = 0;
78
79 return EOK;
80}
81
82/** Finalize the token parser */
83void tok_fini(tokenizer_t *tok)
84{
85 if (tok->outbuf != NULL) {
86 free(tok->outbuf);
87 }
88}
89
90/** Tokenize the input string into the tokens */
91errno_t tok_tokenize(tokenizer_t *tok, size_t *tokens_length)
92{
93 errno_t rc;
94 wchar_t next_char;
95
96 /* Read the input line char by char and append tokens */
97 while ((next_char = tok_look_char(tok)) != 0) {
98 if (next_char == ' ') {
99 /* Push the token if there is any.
100 * There may not be any pending char for a token in case
101 * there are several spaces in the input.
102 */
103 if (tok_pending_chars(tok)) {
104 rc = tok_push_token(tok);
105 if (rc != EOK) {
106 return rc;
107 }
108 }
109 tok_start_token(tok, TOKTYPE_SPACE);
110 /* Eat all the spaces */
111 while (tok_look_char(tok) == ' ') {
112 tok_push_char(tok, tok_get_char(tok));
113 }
114 tok_push_token(tok);
115
116 }
117 else if (next_char == '|') {
118 /* Pipes are tokens that are delimiters and should be
119 * output as a separate token
120 */
121 if (tok_pending_chars(tok)) {
122 rc = tok_push_token(tok);
123 if (rc != EOK) {
124 return rc;
125 }
126 }
127
128 tok_start_token(tok, TOKTYPE_PIPE);
129
130 rc = tok_push_char(tok, tok_get_char(tok));
131 if (rc != EOK) {
132 return rc;
133 }
134
135 rc = tok_push_token(tok);
136 if (rc != EOK) {
137 return rc;
138 }
139 }
140 else if (next_char == '\'') {
141 /* A string starts with a quote (') and ends again with a quote.
142 * A literal quote is written as ''
143 */
144 tok_start_token(tok, TOKTYPE_TEXT);
145 /* Eat the quote */
146 tok_get_char(tok);
147 rc = tok_finish_string(tok);
148 if (rc != EOK) {
149 return rc;
150 }
151 }
152 else {
153 if (!tok_pending_chars(tok)) {
154 tok_start_token(tok, TOKTYPE_TEXT);
155 }
156 /* If we are handling any other character, just append it to
157 * the current token.
158 */
159 rc = tok_push_char(tok, tok_get_char(tok));
160 if (rc != EOK) {
161 return rc;
162 }
163 }
164 }
165
166 /* Push the last token */
167 if (tok_pending_chars(tok)) {
168 rc = tok_push_token(tok);
169 if (rc != EOK) {
170 return rc;
171 }
172 }
173
174 *tokens_length = tok->outtok_offset;
175
176 return EOK;
177}
178
179/** Finish tokenizing an opened string */
180errno_t tok_finish_string(tokenizer_t *tok)
181{
182 errno_t rc;
183 wchar_t next_char;
184
185 while ((next_char = tok_look_char(tok)) != 0) {
186 if (next_char == '\'') {
187 /* Eat the quote */
188 tok_get_char(tok);
189 if (tok_look_char(tok) == '\'') {
190 /* Encode a single literal quote */
191 rc = tok_push_char(tok, '\'');
192 if (rc != EOK) {
193 return rc;
194 }
195
196 /* Swallow the additional one in the input */
197 tok_get_char(tok);
198 }
199 else {
200 /* The string end */
201 return tok_push_token(tok);
202 }
203 }
204 else {
205 rc = tok_push_char(tok, tok_get_char(tok));
206 if (rc != EOK) {
207 return rc;
208 }
209 }
210 }
211
212 /* If we are here, the string run to the end without being closed */
213 return EINVAL;
214}
215
216/** Get a char from input, advancing the input position */
217wchar_t tok_get_char(tokenizer_t *tok)
218{
219 tok->in_char_offset++;
220 return str_decode(tok->in, &tok->in_offset, STR_NO_LIMIT);
221}
222
223/** Get a char from input, while staying on the same input position */
224wchar_t tok_look_char(tokenizer_t *tok)
225{
226 size_t old_offset = tok->in_offset;
227 size_t old_char_offset = tok->in_char_offset;
228 wchar_t ret = tok_get_char(tok);
229 tok->in_offset = old_offset;
230 tok->in_char_offset = old_char_offset;
231 return ret;
232}
233
234/** Append a char to the end of the current token */
235errno_t tok_push_char(tokenizer_t *tok, wchar_t ch)
236{
237 return chr_encode(ch, tok->outbuf, &tok->outbuf_offset, tok->outbuf_size);
238}
239
240void tok_start_token(tokenizer_t *tok, token_type_t type)
241{
242 tok->current_type = type;
243}
244
245/** Push the current token to the output array */
246errno_t tok_push_token(tokenizer_t *tok)
247{
248 if (tok->outtok_offset >= tok->outtok_size) {
249 return EOVERFLOW;
250 }
251
252 if (tok->outbuf_offset >= tok->outbuf_size) {
253 return EOVERFLOW;
254 }
255
256 tok->outbuf[tok->outbuf_offset++] = 0;
257 token_t *tokinfo = &tok->outtok[tok->outtok_offset++];
258 tokinfo->type = tok->current_type;
259 tokinfo->text = tok->outbuf + tok->outbuf_last_start;
260 tokinfo->byte_start = tok->last_in_offset;
261 tokinfo->byte_length = tok->in_offset - tok->last_in_offset;
262 tokinfo->char_start = tok->last_in_char_offset;
263 tokinfo->char_length = tok->in_char_offset - tok->last_in_char_offset;
264 tok->outbuf_last_start = tok->outbuf_offset;
265
266 /* We have consumed the first char of the next token already */
267 tok->last_in_offset = tok->in_offset;
268 tok->last_in_char_offset = tok->in_char_offset;
269
270 return EOK;
271}
272
273/** Return true, if the current token is not empty */
274bool tok_pending_chars(tokenizer_t *tok)
275{
276 assert(tok->outbuf_offset >= tok->outbuf_last_start);
277 return (tok->outbuf_offset != tok->outbuf_last_start);
278}
Note: See TracBrowser for help on using the repository browser.