source: mainline/uspace/app/bdsh/tok.c@ eff10e03

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since eff10e03 was 36ab7c7, checked in by Jiri Svoboda <jiri@…>, 14 years ago

Standardize formatting of copyright headers in Bdsh and add some that were
missing.

  • Property mode set to 100644
File size: 6.3 KB
Line 
1/*
2 * Copyright (c) 2011 Martin Sucha
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <str.h>
30#include <assert.h>
31#include <malloc.h>
32#include <stdlib.h>
33#include <errno.h>
34
35#include "tok.h"
36
37/* Forward declarations of static functions */
38static wchar_t tok_get_char(tokenizer_t *);
39static wchar_t tok_look_char(tokenizer_t *);
40static int tok_push_char(tokenizer_t *, wchar_t);
41static int tok_push_token(tokenizer_t *);
42static bool tok_pending_chars(tokenizer_t *);
43static int tok_finish_string(tokenizer_t *);
44
45/** Initialize the token parser
46 *
47 * @param tok the tokenizer structure to initialize
48 * @param input the input string to tokenize
49 * @param out_tokens array of strings where to store the result
50 * @param max_tokens number of elements of the out_tokens array
51 */
52int tok_init(tokenizer_t *tok, char *input, char **out_tokens,
53 size_t max_tokens)
54{
55 tok->in = input;
56 tok->in_offset = 0;
57
58 tok->outtok = out_tokens;
59 tok->outtok_offset = 0;
60 /* Leave one slot for a null terminator */
61 assert(max_tokens > 0);
62 tok->outtok_size = max_tokens - 1;
63
64 /* Prepare a buffer where all the token strings will be stored */
65 size_t len = str_size(input) + max_tokens + 1;
66 char *tmp = malloc(len);
67
68 if (tmp == NULL) {
69 return ENOMEM;
70 }
71
72 tok->outbuf = tmp;
73 tok->outbuf_offset = 0;
74 tok->outbuf_size = len;
75 tok->outbuf_last_start = 0;
76
77 return EOK;
78}
79
80/** Finalize the token parser */
81void tok_fini(tokenizer_t *tok)
82{
83 if (tok->outbuf != NULL) {
84 free(tok->outbuf);
85 }
86}
87
88/** Tokenize the input string into the tokens */
89int tok_tokenize(tokenizer_t *tok)
90{
91 int rc;
92 wchar_t cur_char;
93
94 /* Read the input line char by char and append tokens */
95 while ((cur_char = tok_get_char(tok)) != 0) {
96 if (cur_char == ' ') {
97 /* Spaces delimit tokens, but are not processed in any way
98 * Push the token if there is any.
99 * There may not be any pending char for a token in case
100 * there are several spaces in the input.
101 */
102 if (tok_pending_chars(tok)) {
103 rc = tok_push_token(tok);
104 if (rc != EOK) {
105 return rc;
106 }
107 }
108 }
109 else if (cur_char == '|') {
110 /* Pipes are tokens that are delimiters and should be output
111 * as a separate token
112 */
113 if (tok_pending_chars(tok)) {
114 rc = tok_push_token(tok);
115 if (rc != EOK) {
116 return rc;
117 }
118 }
119
120 rc = tok_push_char(tok, '|');
121 if (rc != EOK) {
122 return rc;
123 }
124
125 rc = tok_push_token(tok);
126 if (rc != EOK) {
127 return rc;
128 }
129 }
130 else if (cur_char == '\'') {
131 /* A string starts with a quote (') and ends again with a quote.
132 * A literal quote is written as ''
133 */
134 rc = tok_finish_string(tok);
135 if (rc != EOK) {
136 return rc;
137 }
138 }
139 else {
140 /* If we are handling any other character, just append it to
141 * the current token.
142 */
143 rc = tok_push_char(tok, cur_char);
144 if (rc != EOK) {
145 return rc;
146 }
147 }
148 }
149
150 /* Push the last token */
151 if (tok_pending_chars(tok)) {
152 rc = tok_push_token(tok);
153 if (rc != EOK) {
154 return rc;
155 }
156 }
157
158 /* We always have a space for the terminator, as we
159 * reserved it in tok_init */
160 tok->outtok[tok->outtok_offset] = 0;
161
162 return EOK;
163}
164
165/** Finish tokenizing an opened string */
166int tok_finish_string(tokenizer_t *tok)
167{
168 int rc;
169 wchar_t cur_char;
170
171 while ((cur_char = tok_get_char(tok)) != 0) {
172 if (cur_char == '\'') {
173 if (tok_look_char(tok) == '\'') {
174 /* Encode a single literal quote */
175 rc = tok_push_char(tok, '\'');
176 if (rc != EOK) {
177 return rc;
178 }
179
180 /* Swallow the additional one in the input */
181 tok_get_char(tok);
182 }
183 else {
184 /* The string end */
185 return tok_push_token(tok);
186 }
187 }
188 else {
189 rc = tok_push_char(tok, cur_char);
190 if (rc != EOK) {
191 return rc;
192 }
193 }
194 }
195
196 /* If we are here, the string run to the end without being closed */
197 return EINVAL;
198}
199
200/** Get a char from input, advancing the input position */
201wchar_t tok_get_char(tokenizer_t *tok)
202{
203 return str_decode(tok->in, &tok->in_offset, STR_NO_LIMIT);
204}
205
206/** Get a char from input, while staying on the same input position */
207wchar_t tok_look_char(tokenizer_t *tok)
208{
209 size_t old_offset = tok->in_offset;
210 wchar_t ret = tok_get_char(tok);
211 tok->in_offset = old_offset;
212 return ret;
213}
214
215/** Append a char to the end of the current token */
216int tok_push_char(tokenizer_t *tok, wchar_t ch)
217{
218 return chr_encode(ch, tok->outbuf, &tok->outbuf_offset, tok->outbuf_size);
219}
220
221/** Push the current token to the output array */
222int tok_push_token(tokenizer_t *tok)
223{
224 if (tok->outtok_offset >= tok->outtok_size) {
225 return EOVERFLOW;
226 }
227
228 if (tok->outbuf_offset >= tok->outbuf_size) {
229 return EOVERFLOW;
230 }
231
232 tok->outbuf[tok->outbuf_offset++] = 0;
233 tok->outtok[tok->outtok_offset++] = tok->outbuf + tok->outbuf_last_start;
234 tok->outbuf_last_start = tok->outbuf_offset;
235
236 return EOK;
237}
238
239/** Return true, if the current token is not empty */
240bool tok_pending_chars(tokenizer_t *tok)
241{
242 assert(tok->outbuf_offset >= tok->outbuf_last_start);
243 return (tok->outbuf_offset != tok->outbuf_last_start);
244}
Note: See TracBrowser for help on using the repository browser.