source: mainline/uspace/app/bdsh/tok.c@ 494f417

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 494f417 was f41682c, checked in by Martin Sucha <sucha14@…>, 14 years ago

Fix build on 64-bit platforms

  • Property mode set to 100644
File size: 7.3 KB
RevLine 
[36ab7c7]1/*
2 * Copyright (c) 2011 Martin Sucha
[6939edb]3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
[36ab7c7]6 * modification, are permitted provided that the following conditions
7 * are met:
[6939edb]8 *
[36ab7c7]9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
[6939edb]16 *
[36ab7c7]17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
[6939edb]27 */
28
29#include <str.h>
30#include <assert.h>
31#include <malloc.h>
32#include <stdlib.h>
33#include <errno.h>
34
35#include "tok.h"
36
37/* Forward declarations of static functions */
38static wchar_t tok_get_char(tokenizer_t *);
39static wchar_t tok_look_char(tokenizer_t *);
40static int tok_push_char(tokenizer_t *, wchar_t);
41static int tok_push_token(tokenizer_t *);
42static bool tok_pending_chars(tokenizer_t *);
43static int tok_finish_string(tokenizer_t *);
[0662451]44static void tok_start_token(tokenizer_t *, token_type_t);
[6939edb]45
46/** Initialize the token parser
47 *
48 * @param tok the tokenizer structure to initialize
49 * @param input the input string to tokenize
50 * @param out_tokens array of strings where to store the result
51 * @param max_tokens number of elements of the out_tokens array
52 */
[7dcb7981]53int tok_init(tokenizer_t *tok, char *input, token_t *out_tokens,
[6939edb]54 size_t max_tokens)
55{
56 tok->in = input;
57 tok->in_offset = 0;
[7dcb7981]58 tok->last_in_offset = 0;
59 tok->in_char_offset = 0;
60 tok->last_in_char_offset = 0;
[6939edb]61
62 tok->outtok = out_tokens;
63 tok->outtok_offset = 0;
[0662451]64 tok->outtok_size = max_tokens;
[6939edb]65
66 /* Prepare a buffer where all the token strings will be stored */
67 size_t len = str_size(input) + max_tokens + 1;
68 char *tmp = malloc(len);
69
70 if (tmp == NULL) {
71 return ENOMEM;
72 }
73
74 tok->outbuf = tmp;
75 tok->outbuf_offset = 0;
76 tok->outbuf_size = len;
77 tok->outbuf_last_start = 0;
78
79 return EOK;
80}
81
82/** Finalize the token parser */
83void tok_fini(tokenizer_t *tok)
84{
85 if (tok->outbuf != NULL) {
86 free(tok->outbuf);
87 }
88}
89
90/** Tokenize the input string into the tokens */
[0662451]91int tok_tokenize(tokenizer_t *tok, size_t *tokens_length)
[6939edb]92{
93 int rc;
[5992e0e]94 wchar_t next_char;
[6939edb]95
96 /* Read the input line char by char and append tokens */
[5992e0e]97 while ((next_char = tok_look_char(tok)) != 0) {
98 if (next_char == ' ') {
[0662451]99 /* Push the token if there is any.
[6939edb]100 * There may not be any pending char for a token in case
101 * there are several spaces in the input.
102 */
103 if (tok_pending_chars(tok)) {
104 rc = tok_push_token(tok);
105 if (rc != EOK) {
106 return rc;
107 }
108 }
[0662451]109 tok_start_token(tok, TOKTYPE_SPACE);
[5992e0e]110 /* Eat all the spaces */
[0662451]111 while (tok_look_char(tok) == ' ') {
112 tok_push_char(tok, tok_get_char(tok));
113 }
114 tok_push_token(tok);
115
[6939edb]116 }
[5992e0e]117 else if (next_char == '|') {
118 /* Pipes are tokens that are delimiters and should be
119 * output as a separate token
[6939edb]120 */
121 if (tok_pending_chars(tok)) {
122 rc = tok_push_token(tok);
123 if (rc != EOK) {
124 return rc;
125 }
126 }
127
[0662451]128 tok_start_token(tok, TOKTYPE_PIPE);
129
[5992e0e]130 rc = tok_push_char(tok, tok_get_char(tok));
[6939edb]131 if (rc != EOK) {
132 return rc;
133 }
134
135 rc = tok_push_token(tok);
136 if (rc != EOK) {
137 return rc;
138 }
139 }
[5992e0e]140 else if (next_char == '\'') {
[6939edb]141 /* A string starts with a quote (') and ends again with a quote.
142 * A literal quote is written as ''
143 */
[0662451]144 tok_start_token(tok, TOKTYPE_TEXT);
[5992e0e]145 /* Eat the quote */
146 tok_get_char(tok);
[6939edb]147 rc = tok_finish_string(tok);
148 if (rc != EOK) {
149 return rc;
150 }
151 }
152 else {
[0662451]153 if (!tok_pending_chars(tok)) {
154 tok_start_token(tok, TOKTYPE_TEXT);
155 }
[6939edb]156 /* If we are handling any other character, just append it to
157 * the current token.
158 */
[5992e0e]159 rc = tok_push_char(tok, tok_get_char(tok));
[6939edb]160 if (rc != EOK) {
161 return rc;
162 }
163 }
164 }
165
166 /* Push the last token */
167 if (tok_pending_chars(tok)) {
168 rc = tok_push_token(tok);
169 if (rc != EOK) {
170 return rc;
171 }
172 }
173
[0662451]174 *tokens_length = tok->outtok_offset;
[6939edb]175
176 return EOK;
177}
178
179/** Finish tokenizing an opened string */
180int tok_finish_string(tokenizer_t *tok)
181{
182 int rc;
[5992e0e]183 wchar_t next_char;
[6939edb]184
[5992e0e]185 while ((next_char = tok_look_char(tok)) != 0) {
186 if (next_char == '\'') {
187 /* Eat the quote */
188 tok_get_char(tok);
[6939edb]189 if (tok_look_char(tok) == '\'') {
190 /* Encode a single literal quote */
191 rc = tok_push_char(tok, '\'');
192 if (rc != EOK) {
193 return rc;
194 }
195
196 /* Swallow the additional one in the input */
197 tok_get_char(tok);
198 }
199 else {
200 /* The string end */
201 return tok_push_token(tok);
202 }
203 }
204 else {
[5992e0e]205 rc = tok_push_char(tok, tok_get_char(tok));
[6939edb]206 if (rc != EOK) {
207 return rc;
208 }
209 }
210 }
211
212 /* If we are here, the string run to the end without being closed */
213 return EINVAL;
214}
215
216/** Get a char from input, advancing the input position */
217wchar_t tok_get_char(tokenizer_t *tok)
218{
[7dcb7981]219 tok->in_char_offset++;
[6939edb]220 return str_decode(tok->in, &tok->in_offset, STR_NO_LIMIT);
221}
222
223/** Get a char from input, while staying on the same input position */
224wchar_t tok_look_char(tokenizer_t *tok)
225{
[f41682c]226 size_t old_offset = tok->in_offset;
227 size_t old_char_offset = tok->in_char_offset;
[6939edb]228 wchar_t ret = tok_get_char(tok);
229 tok->in_offset = old_offset;
[7dcb7981]230 tok->in_char_offset = old_char_offset;
[6939edb]231 return ret;
232}
233
234/** Append a char to the end of the current token */
235int tok_push_char(tokenizer_t *tok, wchar_t ch)
236{
237 return chr_encode(ch, tok->outbuf, &tok->outbuf_offset, tok->outbuf_size);
238}
239
[0662451]240void tok_start_token(tokenizer_t *tok, token_type_t type)
241{
242 tok->current_type = type;
243}
244
[6939edb]245/** Push the current token to the output array */
246int tok_push_token(tokenizer_t *tok)
247{
248 if (tok->outtok_offset >= tok->outtok_size) {
249 return EOVERFLOW;
250 }
251
252 if (tok->outbuf_offset >= tok->outbuf_size) {
253 return EOVERFLOW;
254 }
255
256 tok->outbuf[tok->outbuf_offset++] = 0;
[0662451]257 token_t *tokinfo = &tok->outtok[tok->outtok_offset++];
258 tokinfo->type = tok->current_type;
259 tokinfo->text = tok->outbuf + tok->outbuf_last_start;
260 tokinfo->byte_start = tok->last_in_offset;
[5992e0e]261 tokinfo->byte_length = tok->in_offset - tok->last_in_offset;
[0662451]262 tokinfo->char_start = tok->last_in_char_offset;
[5992e0e]263 tokinfo->char_length = tok->in_char_offset - tok->last_in_char_offset;
[6939edb]264 tok->outbuf_last_start = tok->outbuf_offset;
265
[7dcb7981]266 /* We have consumed the first char of the next token already */
[5992e0e]267 tok->last_in_offset = tok->in_offset;
268 tok->last_in_char_offset = tok->in_char_offset;
[7dcb7981]269
[6939edb]270 return EOK;
271}
272
273/** Return true, if the current token is not empty */
274bool tok_pending_chars(tokenizer_t *tok)
275{
276 assert(tok->outbuf_offset >= tok->outbuf_last_start);
277 return (tok->outbuf_offset != tok->outbuf_last_start);
[7dcb7981]278}
Note: See TracBrowser for help on using the repository browser.