source: mainline/uspace/app/bdsh/tok.c@ 2aaba7e

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 2aaba7e was 582a0b8, checked in by Jakub Jermar <jakub@…>, 8 years ago

Remove unistd.h

  • Rename usleep() and sleep() to thread_usleep() and thread_sleep() and move to thread.[hc].
  • Include stddef.h in order to provide NULL.
  • Move getpagesize() to libposix.
  • Sync uspace/dist/src/c/demos with originals.
  • Property mode set to 100644
File size: 7.3 KB
Line 
1/*
2 * Copyright (c) 2011 Martin Sucha
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <str.h>
30#include <assert.h>
31#include <malloc.h>
32#include <stdlib.h>
33#include <stddef.h>
34#include <errno.h>
35
36#include "tok.h"
37
38/* Forward declarations of static functions */
39static wchar_t tok_get_char(tokenizer_t *);
40static wchar_t tok_look_char(tokenizer_t *);
41static int tok_push_char(tokenizer_t *, wchar_t);
42static int tok_push_token(tokenizer_t *);
43static bool tok_pending_chars(tokenizer_t *);
44static int tok_finish_string(tokenizer_t *);
45static void tok_start_token(tokenizer_t *, token_type_t);
46
47/** Initialize the token parser
48 *
49 * @param tok the tokenizer structure to initialize
50 * @param input the input string to tokenize
51 * @param out_tokens array of strings where to store the result
52 * @param max_tokens number of elements of the out_tokens array
53 */
54int tok_init(tokenizer_t *tok, char *input, token_t *out_tokens,
55 size_t max_tokens)
56{
57 tok->in = input;
58 tok->in_offset = 0;
59 tok->last_in_offset = 0;
60 tok->in_char_offset = 0;
61 tok->last_in_char_offset = 0;
62
63 tok->outtok = out_tokens;
64 tok->outtok_offset = 0;
65 tok->outtok_size = max_tokens;
66
67 /* Prepare a buffer where all the token strings will be stored */
68 size_t len = str_size(input) + max_tokens + 1;
69 char *tmp = malloc(len);
70
71 if (tmp == NULL) {
72 return ENOMEM;
73 }
74
75 tok->outbuf = tmp;
76 tok->outbuf_offset = 0;
77 tok->outbuf_size = len;
78 tok->outbuf_last_start = 0;
79
80 return EOK;
81}
82
83/** Finalize the token parser */
84void tok_fini(tokenizer_t *tok)
85{
86 if (tok->outbuf != NULL) {
87 free(tok->outbuf);
88 }
89}
90
91/** Tokenize the input string into the tokens */
92int tok_tokenize(tokenizer_t *tok, size_t *tokens_length)
93{
94 int rc;
95 wchar_t next_char;
96
97 /* Read the input line char by char and append tokens */
98 while ((next_char = tok_look_char(tok)) != 0) {
99 if (next_char == ' ') {
100 /* Push the token if there is any.
101 * There may not be any pending char for a token in case
102 * there are several spaces in the input.
103 */
104 if (tok_pending_chars(tok)) {
105 rc = tok_push_token(tok);
106 if (rc != EOK) {
107 return rc;
108 }
109 }
110 tok_start_token(tok, TOKTYPE_SPACE);
111 /* Eat all the spaces */
112 while (tok_look_char(tok) == ' ') {
113 tok_push_char(tok, tok_get_char(tok));
114 }
115 tok_push_token(tok);
116
117 }
118 else if (next_char == '|') {
119 /* Pipes are tokens that are delimiters and should be
120 * output as a separate token
121 */
122 if (tok_pending_chars(tok)) {
123 rc = tok_push_token(tok);
124 if (rc != EOK) {
125 return rc;
126 }
127 }
128
129 tok_start_token(tok, TOKTYPE_PIPE);
130
131 rc = tok_push_char(tok, tok_get_char(tok));
132 if (rc != EOK) {
133 return rc;
134 }
135
136 rc = tok_push_token(tok);
137 if (rc != EOK) {
138 return rc;
139 }
140 }
141 else if (next_char == '\'') {
142 /* A string starts with a quote (') and ends again with a quote.
143 * A literal quote is written as ''
144 */
145 tok_start_token(tok, TOKTYPE_TEXT);
146 /* Eat the quote */
147 tok_get_char(tok);
148 rc = tok_finish_string(tok);
149 if (rc != EOK) {
150 return rc;
151 }
152 }
153 else {
154 if (!tok_pending_chars(tok)) {
155 tok_start_token(tok, TOKTYPE_TEXT);
156 }
157 /* If we are handling any other character, just append it to
158 * the current token.
159 */
160 rc = tok_push_char(tok, tok_get_char(tok));
161 if (rc != EOK) {
162 return rc;
163 }
164 }
165 }
166
167 /* Push the last token */
168 if (tok_pending_chars(tok)) {
169 rc = tok_push_token(tok);
170 if (rc != EOK) {
171 return rc;
172 }
173 }
174
175 *tokens_length = tok->outtok_offset;
176
177 return EOK;
178}
179
180/** Finish tokenizing an opened string */
181int tok_finish_string(tokenizer_t *tok)
182{
183 int rc;
184 wchar_t next_char;
185
186 while ((next_char = tok_look_char(tok)) != 0) {
187 if (next_char == '\'') {
188 /* Eat the quote */
189 tok_get_char(tok);
190 if (tok_look_char(tok) == '\'') {
191 /* Encode a single literal quote */
192 rc = tok_push_char(tok, '\'');
193 if (rc != EOK) {
194 return rc;
195 }
196
197 /* Swallow the additional one in the input */
198 tok_get_char(tok);
199 }
200 else {
201 /* The string end */
202 return tok_push_token(tok);
203 }
204 }
205 else {
206 rc = tok_push_char(tok, tok_get_char(tok));
207 if (rc != EOK) {
208 return rc;
209 }
210 }
211 }
212
213 /* If we are here, the string run to the end without being closed */
214 return EINVAL;
215}
216
217/** Get a char from input, advancing the input position */
218wchar_t tok_get_char(tokenizer_t *tok)
219{
220 tok->in_char_offset++;
221 return str_decode(tok->in, &tok->in_offset, STR_NO_LIMIT);
222}
223
224/** Get a char from input, while staying on the same input position */
225wchar_t tok_look_char(tokenizer_t *tok)
226{
227 size_t old_offset = tok->in_offset;
228 size_t old_char_offset = tok->in_char_offset;
229 wchar_t ret = tok_get_char(tok);
230 tok->in_offset = old_offset;
231 tok->in_char_offset = old_char_offset;
232 return ret;
233}
234
235/** Append a char to the end of the current token */
236int tok_push_char(tokenizer_t *tok, wchar_t ch)
237{
238 return chr_encode(ch, tok->outbuf, &tok->outbuf_offset, tok->outbuf_size);
239}
240
241void tok_start_token(tokenizer_t *tok, token_type_t type)
242{
243 tok->current_type = type;
244}
245
246/** Push the current token to the output array */
247int tok_push_token(tokenizer_t *tok)
248{
249 if (tok->outtok_offset >= tok->outtok_size) {
250 return EOVERFLOW;
251 }
252
253 if (tok->outbuf_offset >= tok->outbuf_size) {
254 return EOVERFLOW;
255 }
256
257 tok->outbuf[tok->outbuf_offset++] = 0;
258 token_t *tokinfo = &tok->outtok[tok->outtok_offset++];
259 tokinfo->type = tok->current_type;
260 tokinfo->text = tok->outbuf + tok->outbuf_last_start;
261 tokinfo->byte_start = tok->last_in_offset;
262 tokinfo->byte_length = tok->in_offset - tok->last_in_offset;
263 tokinfo->char_start = tok->last_in_char_offset;
264 tokinfo->char_length = tok->in_char_offset - tok->last_in_char_offset;
265 tok->outbuf_last_start = tok->outbuf_offset;
266
267 /* We have consumed the first char of the next token already */
268 tok->last_in_offset = tok->in_offset;
269 tok->last_in_char_offset = tok->in_char_offset;
270
271 return EOK;
272}
273
274/** Return true, if the current token is not empty */
275bool tok_pending_chars(tokenizer_t *tok)
276{
277 assert(tok->outbuf_offset >= tok->outbuf_last_start);
278 return (tok->outbuf_offset != tok->outbuf_last_start);
279}
Note: See TracBrowser for help on using the repository browser.