Context Navigation

source: mainline/uspace/app/bdsh/tok.c@ 494f417

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 494f417 was f41682c, checked in by Martin Sucha <sucha14@…>, 14 years ago
Fix build on 64-bit platforms
Property mode set to `100644`
File size: 7.3 KB

Rev	Line
[36ab7c7]	1	/*
	2	* Copyright (c) 2011 Martin Sucha
[6939edb]	3	* All rights reserved.
	4	*
	5	* Redistribution and use in source and binary forms, with or without
[36ab7c7]	6	* modification, are permitted provided that the following conditions
	7	* are met:
[6939edb]	8	*
[36ab7c7]	9	* - Redistributions of source code must retain the above copyright
	10	* notice, this list of conditions and the following disclaimer.
	11	* - Redistributions in binary form must reproduce the above copyright
	12	* notice, this list of conditions and the following disclaimer in the
	13	* documentation and/or other materials provided with the distribution.
	14	* - The name of the author may not be used to endorse or promote products
	15	* derived from this software without specific prior written permission.
[6939edb]	16	*
[36ab7c7]	17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
[6939edb]	27	*/
	28
	29	#include <str.h>
	30	#include <assert.h>
	31	#include <malloc.h>
	32	#include <stdlib.h>
	33	#include <errno.h>
	34
	35	#include "tok.h"
	36
	37	/* Forward declarations of static functions */
	38	static wchar_t tok_get_char(tokenizer_t *);
	39	static wchar_t tok_look_char(tokenizer_t *);
	40	static int tok_push_char(tokenizer_t *, wchar_t);
	41	static int tok_push_token(tokenizer_t *);
	42	static bool tok_pending_chars(tokenizer_t *);
	43	static int tok_finish_string(tokenizer_t *);
[0662451]	44	static void tok_start_token(tokenizer_t *, token_type_t);
[6939edb]	45
	46	/** Initialize the token parser
	47	*
	48	* @param tok the tokenizer structure to initialize
	49	* @param input the input string to tokenize
	50	* @param out_tokens array of strings where to store the result
	51	* @param max_tokens number of elements of the out_tokens array
	52	*/
[7dcb7981]	53	int tok_init(tokenizer_t tok, char input, token_t *out_tokens,
[6939edb]	54	size_t max_tokens)
	55	{
	56	tok->in = input;
	57	tok->in_offset = 0;
[7dcb7981]	58	tok->last_in_offset = 0;
	59	tok->in_char_offset = 0;
	60	tok->last_in_char_offset = 0;
[6939edb]	61
	62	tok->outtok = out_tokens;
	63	tok->outtok_offset = 0;
[0662451]	64	tok->outtok_size = max_tokens;
[6939edb]	65
	66	/* Prepare a buffer where all the token strings will be stored */
	67	size_t len = str_size(input) + max_tokens + 1;
	68	char *tmp = malloc(len);
	69
	70	if (tmp == NULL) {
	71	return ENOMEM;
	72	}
	73
	74	tok->outbuf = tmp;
	75	tok->outbuf_offset = 0;
	76	tok->outbuf_size = len;
	77	tok->outbuf_last_start = 0;
	78
	79	return EOK;
	80	}
	81
	82	/** Finalize the token parser */
	83	void tok_fini(tokenizer_t *tok)
	84	{
	85	if (tok->outbuf != NULL) {
	86	free(tok->outbuf);
	87	}
	88	}
	89
	90	/** Tokenize the input string into the tokens */
[0662451]	91	int tok_tokenize(tokenizer_t tok, size_t tokens_length)
[6939edb]	92	{
	93	int rc;
[5992e0e]	94	wchar_t next_char;
[6939edb]	95
	96	/* Read the input line char by char and append tokens */
[5992e0e]	97	while ((next_char = tok_look_char(tok)) != 0) {
	98	if (next_char == ' ') {
[0662451]	99	/* Push the token if there is any.
[6939edb]	100	* There may not be any pending char for a token in case
	101	* there are several spaces in the input.
	102	*/
	103	if (tok_pending_chars(tok)) {
	104	rc = tok_push_token(tok);
	105	if (rc != EOK) {
	106	return rc;
	107	}
	108	}
[0662451]	109	tok_start_token(tok, TOKTYPE_SPACE);
[5992e0e]	110	/* Eat all the spaces */
[0662451]	111	while (tok_look_char(tok) == ' ') {
	112	tok_push_char(tok, tok_get_char(tok));
	113	}
	114	tok_push_token(tok);
	115
[6939edb]	116	}
[5992e0e]	117	else if (next_char == '\|') {
	118	/* Pipes are tokens that are delimiters and should be
	119	* output as a separate token
[6939edb]	120	*/
	121	if (tok_pending_chars(tok)) {
	122	rc = tok_push_token(tok);
	123	if (rc != EOK) {
	124	return rc;
	125	}
	126	}
	127
[0662451]	128	tok_start_token(tok, TOKTYPE_PIPE);
	129
[5992e0e]	130	rc = tok_push_char(tok, tok_get_char(tok));
[6939edb]	131	if (rc != EOK) {
	132	return rc;
	133	}
	134
	135	rc = tok_push_token(tok);
	136	if (rc != EOK) {
	137	return rc;
	138	}
	139	}
[5992e0e]	140	else if (next_char == '\'') {
[6939edb]	141	/* A string starts with a quote (') and ends again with a quote.
	142	* A literal quote is written as ''
	143	*/
[0662451]	144	tok_start_token(tok, TOKTYPE_TEXT);
[5992e0e]	145	/* Eat the quote */
	146	tok_get_char(tok);
[6939edb]	147	rc = tok_finish_string(tok);
	148	if (rc != EOK) {
	149	return rc;
	150	}
	151	}
	152	else {
[0662451]	153	if (!tok_pending_chars(tok)) {
	154	tok_start_token(tok, TOKTYPE_TEXT);
	155	}
[6939edb]	156	/* If we are handling any other character, just append it to
	157	* the current token.
	158	*/
[5992e0e]	159	rc = tok_push_char(tok, tok_get_char(tok));
[6939edb]	160	if (rc != EOK) {
	161	return rc;
	162	}
	163	}
	164	}
	165
	166	/* Push the last token */
	167	if (tok_pending_chars(tok)) {
	168	rc = tok_push_token(tok);
	169	if (rc != EOK) {
	170	return rc;
	171	}
	172	}
	173
[0662451]	174	*tokens_length = tok->outtok_offset;
[6939edb]	175
	176	return EOK;
	177	}
	178
	179	/** Finish tokenizing an opened string */
	180	int tok_finish_string(tokenizer_t *tok)
	181	{
	182	int rc;
[5992e0e]	183	wchar_t next_char;
[6939edb]	184
[5992e0e]	185	while ((next_char = tok_look_char(tok)) != 0) {
	186	if (next_char == '\'') {
	187	/* Eat the quote */
	188	tok_get_char(tok);
[6939edb]	189	if (tok_look_char(tok) == '\'') {
	190	/* Encode a single literal quote */
	191	rc = tok_push_char(tok, '\'');
	192	if (rc != EOK) {
	193	return rc;
	194	}
	195
	196	/* Swallow the additional one in the input */
	197	tok_get_char(tok);
	198	}
	199	else {
	200	/* The string end */
	201	return tok_push_token(tok);
	202	}
	203	}
	204	else {
[5992e0e]	205	rc = tok_push_char(tok, tok_get_char(tok));
[6939edb]	206	if (rc != EOK) {
	207	return rc;
	208	}
	209	}
	210	}
	211
	212	/* If we are here, the string run to the end without being closed */
	213	return EINVAL;
	214	}
	215
	216	/** Get a char from input, advancing the input position */
	217	wchar_t tok_get_char(tokenizer_t *tok)
	218	{
[7dcb7981]	219	tok->in_char_offset++;
[6939edb]	220	return str_decode(tok->in, &tok->in_offset, STR_NO_LIMIT);
	221	}
	222
	223	/** Get a char from input, while staying on the same input position */
	224	wchar_t tok_look_char(tokenizer_t *tok)
	225	{
[f41682c]	226	size_t old_offset = tok->in_offset;
	227	size_t old_char_offset = tok->in_char_offset;
[6939edb]	228	wchar_t ret = tok_get_char(tok);
	229	tok->in_offset = old_offset;
[7dcb7981]	230	tok->in_char_offset = old_char_offset;
[6939edb]	231	return ret;
	232	}
	233
	234	/** Append a char to the end of the current token */
	235	int tok_push_char(tokenizer_t *tok, wchar_t ch)
	236	{
	237	return chr_encode(ch, tok->outbuf, &tok->outbuf_offset, tok->outbuf_size);
	238	}
	239
[0662451]	240	void tok_start_token(tokenizer_t *tok, token_type_t type)
	241	{
	242	tok->current_type = type;
	243	}
	244
[6939edb]	245	/** Push the current token to the output array */
	246	int tok_push_token(tokenizer_t *tok)
	247	{
	248	if (tok->outtok_offset >= tok->outtok_size) {
	249	return EOVERFLOW;
	250	}
	251
	252	if (tok->outbuf_offset >= tok->outbuf_size) {
	253	return EOVERFLOW;
	254	}
	255
	256	tok->outbuf[tok->outbuf_offset++] = 0;
[0662451]	257	token_t *tokinfo = &tok->outtok[tok->outtok_offset++];
	258	tokinfo->type = tok->current_type;
	259	tokinfo->text = tok->outbuf + tok->outbuf_last_start;
	260	tokinfo->byte_start = tok->last_in_offset;
[5992e0e]	261	tokinfo->byte_length = tok->in_offset - tok->last_in_offset;
[0662451]	262	tokinfo->char_start = tok->last_in_char_offset;
[5992e0e]	263	tokinfo->char_length = tok->in_char_offset - tok->last_in_char_offset;
[6939edb]	264	tok->outbuf_last_start = tok->outbuf_offset;
	265
[7dcb7981]	266	/* We have consumed the first char of the next token already */
[5992e0e]	267	tok->last_in_offset = tok->in_offset;
	268	tok->last_in_char_offset = tok->in_char_offset;
[7dcb7981]	269
[6939edb]	270	return EOK;
	271	}
	272
	273	/** Return true, if the current token is not empty */
	274	bool tok_pending_chars(tokenizer_t *tok)
	275	{
	276	assert(tok->outbuf_offset >= tok->outbuf_last_start);
	277	return (tok->outbuf_offset != tok->outbuf_last_start);
[7dcb7981]	278	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: