Context Navigation

tok.c@ 2aaba7e

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 2aaba7e was 582a0b8, checked in by Jakub Jermar <jakub@…>, 8 years ago

Remove unistd.h

Rename usleep() and sleep() to thread_usleep() and thread_sleep() and move to thread.[hc].
Include stddef.h in order to provide NULL.
Move getpagesize() to libposix.
Sync uspace/dist/src/c/demos with originals.

Property mode set to 100644

File size: 7.3 KB

Line
1	/*
2	* Copyright (c) 2011 Martin Sucha
3	* All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	*
9	* - Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	* - Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* - The name of the author may not be used to endorse or promote products
15	* derived from this software without specific prior written permission.
16	*
17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	#include <str.h>
30	#include <assert.h>
31	#include <malloc.h>
32	#include <stdlib.h>
33	#include <stddef.h>
34	#include <errno.h>
35
36	#include "tok.h"
37
38	/* Forward declarations of static functions */
39	static wchar_t tok_get_char(tokenizer_t *);
40	static wchar_t tok_look_char(tokenizer_t *);
41	static int tok_push_char(tokenizer_t *, wchar_t);
42	static int tok_push_token(tokenizer_t *);
43	static bool tok_pending_chars(tokenizer_t *);
44	static int tok_finish_string(tokenizer_t *);
45	static void tok_start_token(tokenizer_t *, token_type_t);
46
47	/** Initialize the token parser
48	*
49	* @param tok the tokenizer structure to initialize
50	* @param input the input string to tokenize
51	* @param out_tokens array of strings where to store the result
52	* @param max_tokens number of elements of the out_tokens array
53	*/
54	int tok_init(tokenizer_t tok, char input, token_t *out_tokens,
55	size_t max_tokens)
56	{
57	tok->in = input;
58	tok->in_offset = 0;
59	tok->last_in_offset = 0;
60	tok->in_char_offset = 0;
61	tok->last_in_char_offset = 0;
62
63	tok->outtok = out_tokens;
64	tok->outtok_offset = 0;
65	tok->outtok_size = max_tokens;
66
67	/* Prepare a buffer where all the token strings will be stored */
68	size_t len = str_size(input) + max_tokens + 1;
69	char *tmp = malloc(len);
70
71	if (tmp == NULL) {
72	return ENOMEM;
73	}
74
75	tok->outbuf = tmp;
76	tok->outbuf_offset = 0;
77	tok->outbuf_size = len;
78	tok->outbuf_last_start = 0;
79
80	return EOK;
81	}
82
83	/** Finalize the token parser */
84	void tok_fini(tokenizer_t *tok)
85	{
86	if (tok->outbuf != NULL) {
87	free(tok->outbuf);
88	}
89	}
90
91	/** Tokenize the input string into the tokens */
92	int tok_tokenize(tokenizer_t tok, size_t tokens_length)
93	{
94	int rc;
95	wchar_t next_char;
96
97	/* Read the input line char by char and append tokens */
98	while ((next_char = tok_look_char(tok)) != 0) {
99	if (next_char == ' ') {
100	/* Push the token if there is any.
101	* There may not be any pending char for a token in case
102	* there are several spaces in the input.
103	*/
104	if (tok_pending_chars(tok)) {
105	rc = tok_push_token(tok);
106	if (rc != EOK) {
107	return rc;
108	}
109	}
110	tok_start_token(tok, TOKTYPE_SPACE);
111	/* Eat all the spaces */
112	while (tok_look_char(tok) == ' ') {
113	tok_push_char(tok, tok_get_char(tok));
114	}
115	tok_push_token(tok);
116
117	}
118	else if (next_char == '\|') {
119	/* Pipes are tokens that are delimiters and should be
120	* output as a separate token
121	*/
122	if (tok_pending_chars(tok)) {
123	rc = tok_push_token(tok);
124	if (rc != EOK) {
125	return rc;
126	}
127	}
128
129	tok_start_token(tok, TOKTYPE_PIPE);
130
131	rc = tok_push_char(tok, tok_get_char(tok));
132	if (rc != EOK) {
133	return rc;
134	}
135
136	rc = tok_push_token(tok);
137	if (rc != EOK) {
138	return rc;
139	}
140	}
141	else if (next_char == '\'') {
142	/* A string starts with a quote (') and ends again with a quote.
143	* A literal quote is written as ''
144	*/
145	tok_start_token(tok, TOKTYPE_TEXT);
146	/* Eat the quote */
147	tok_get_char(tok);
148	rc = tok_finish_string(tok);
149	if (rc != EOK) {
150	return rc;
151	}
152	}
153	else {
154	if (!tok_pending_chars(tok)) {
155	tok_start_token(tok, TOKTYPE_TEXT);
156	}
157	/* If we are handling any other character, just append it to
158	* the current token.
159	*/
160	rc = tok_push_char(tok, tok_get_char(tok));
161	if (rc != EOK) {
162	return rc;
163	}
164	}
165	}
166
167	/* Push the last token */
168	if (tok_pending_chars(tok)) {
169	rc = tok_push_token(tok);
170	if (rc != EOK) {
171	return rc;
172	}
173	}
174
175	*tokens_length = tok->outtok_offset;
176
177	return EOK;
178	}
179
180	/** Finish tokenizing an opened string */
181	int tok_finish_string(tokenizer_t *tok)
182	{
183	int rc;
184	wchar_t next_char;
185
186	while ((next_char = tok_look_char(tok)) != 0) {
187	if (next_char == '\'') {
188	/* Eat the quote */
189	tok_get_char(tok);
190	if (tok_look_char(tok) == '\'') {
191	/* Encode a single literal quote */
192	rc = tok_push_char(tok, '\'');
193	if (rc != EOK) {
194	return rc;
195	}
196
197	/* Swallow the additional one in the input */
198	tok_get_char(tok);
199	}
200	else {
201	/* The string end */
202	return tok_push_token(tok);
203	}
204	}
205	else {
206	rc = tok_push_char(tok, tok_get_char(tok));
207	if (rc != EOK) {
208	return rc;
209	}
210	}
211	}
212
213	/* If we are here, the string run to the end without being closed */
214	return EINVAL;
215	}
216
217	/** Get a char from input, advancing the input position */
218	wchar_t tok_get_char(tokenizer_t *tok)
219	{
220	tok->in_char_offset++;
221	return str_decode(tok->in, &tok->in_offset, STR_NO_LIMIT);
222	}
223
224	/** Get a char from input, while staying on the same input position */
225	wchar_t tok_look_char(tokenizer_t *tok)
226	{
227	size_t old_offset = tok->in_offset;
228	size_t old_char_offset = tok->in_char_offset;
229	wchar_t ret = tok_get_char(tok);
230	tok->in_offset = old_offset;
231	tok->in_char_offset = old_char_offset;
232	return ret;
233	}
234
235	/** Append a char to the end of the current token */
236	int tok_push_char(tokenizer_t *tok, wchar_t ch)
237	{
238	return chr_encode(ch, tok->outbuf, &tok->outbuf_offset, tok->outbuf_size);
239	}
240
241	void tok_start_token(tokenizer_t *tok, token_type_t type)
242	{
243	tok->current_type = type;
244	}
245
246	/** Push the current token to the output array */
247	int tok_push_token(tokenizer_t *tok)
248	{
249	if (tok->outtok_offset >= tok->outtok_size) {
250	return EOVERFLOW;
251	}
252
253	if (tok->outbuf_offset >= tok->outbuf_size) {
254	return EOVERFLOW;
255	}
256
257	tok->outbuf[tok->outbuf_offset++] = 0;
258	token_t *tokinfo = &tok->outtok[tok->outtok_offset++];
259	tokinfo->type = tok->current_type;
260	tokinfo->text = tok->outbuf + tok->outbuf_last_start;
261	tokinfo->byte_start = tok->last_in_offset;
262	tokinfo->byte_length = tok->in_offset - tok->last_in_offset;
263	tokinfo->char_start = tok->last_in_char_offset;
264	tokinfo->char_length = tok->in_char_offset - tok->last_in_char_offset;
265	tok->outbuf_last_start = tok->outbuf_offset;
266
267	/* We have consumed the first char of the next token already */
268	tok->last_in_offset = tok->in_offset;
269	tok->last_in_char_offset = tok->in_char_offset;
270
271	return EOK;
272	}
273
274	/** Return true, if the current token is not empty */
275	bool tok_pending_chars(tokenizer_t *tok)
276	{
277	assert(tok->outbuf_offset >= tok->outbuf_last_start);
278	return (tok->outbuf_offset != tok->outbuf_last_start);
279	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/app/bdsh/tok.c@ 2aaba7e

Download in other formats: