source: mainline/uspace/app/bithenge/script.c@ 10334c2e

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 10334c2e was 10334c2e, checked in by Sean Bartell <wingedtachikoma@…>, 13 years ago

Bithenge: add if_transform

  • Property mode set to 100644
File size: 19.3 KB
Line 
1/*
2 * Copyright (c) 2012 Sean Bartell
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup bithenge
30 * @{
31 */
32/**
33 * @file
34 * Script parsing.
35 */
36
37#include <ctype.h>
38#include <stdio.h>
39#include <stdlib.h>
40#include "expression.h"
41#include "os.h"
42#include "script.h"
43#include "transform.h"
44#include "tree.h"
45
46/** Tokens with more characters than this may be read incorrectly. */
47#define MAX_TOKEN_SIZE 256
48#define BUFFER_SIZE 4096
49
50/** Single-character symbols are represented by the character itself. Every
51 * other token uses one of these values: */
52typedef enum {
53 TOKEN_ERROR = -128,
54 TOKEN_EOF,
55 TOKEN_IDENTIFIER,
56 TOKEN_INTEGER,
57 TOKEN_LEFT_ARROW,
58
59 /* Keywords */
60 TOKEN_ELSE,
61 TOKEN_FALSE,
62 TOKEN_IF,
63 TOKEN_STRUCT,
64 TOKEN_TRANSFORM,
65 TOKEN_TRUE,
66} token_type_t;
67
68/** Singly-linked list of named transforms. */
69typedef struct transform_list {
70 char *name;
71 bithenge_transform_t *transform;
72 struct transform_list *next;
73} transform_list_t;
74
75/** State kept by the parser. */
76typedef struct {
77 /** Rather than constantly checking return values, the parser uses this
78 * to indicate whether an error has occurred. */
79 int error;
80
81 /** The list of named transforms. */
82 transform_list_t *transform_list;
83
84 /** The name of the script file. */
85 const char *filename;
86 /** The script file being read from. */
87 FILE *file;
88 /** The buffer that holds script code. There is always a '\0' after the
89 * current position to prevent reading too far. */
90 char buffer[BUFFER_SIZE];
91 /** The start position within the buffer of the next unread token. */
92 size_t buffer_pos;
93 /** The start position within the buffer of the current token. */
94 size_t old_buffer_pos;
95 /** The line number of the current token. */
96 int lineno;
97 /** Added to a buffer position to find the column number. */
98 int line_offset;
99
100 /** The type of the current token. */
101 token_type_t token;
102 union {
103 /** The value of a TOKEN_IDENTIFIER token. Unless changed to
104 * NULL, it will be freed when the next token is read. */
105 char *token_string;
106 /** The value of a TOKEN_INTEGER token. */
107 bithenge_int_t token_int;
108 };
109
110 /** The names of the current transform's parameters. */
111 char **parameter_names;
112 /** The number of parameters. */
113 int num_params;
114} state_t;
115
116/** Free the previous token's data. This must be called before changing
117 * state->token. */
118static void done_with_token(state_t *state)
119{
120 if (state->token == TOKEN_IDENTIFIER)
121 free(state->token_string);
122 state->token = TOKEN_ERROR;
123}
124
125/** Note that an error has occurred if error is not EOK. */
126static void error_errno(state_t *state, int error)
127{
128 // Don't overwrite a previous error.
129 if (state->error == EOK && error != EOK) {
130 done_with_token(state);
131 state->token = TOKEN_ERROR;
132 state->error = error;
133 }
134}
135
136/** Note that a syntax error has occurred and print an error message. */
137static void syntax_error(state_t *state, const char *message)
138{
139 // Printing multiple errors is confusing.
140 if (state->error == EOK) {
141 size_t start_char = state->old_buffer_pos + state->line_offset;
142 size_t end_char = state->buffer_pos + state->line_offset;
143 size_t size = end_char - start_char;
144 fprintf(stderr, "%s:%d:", state->filename, state->lineno);
145 if (size <= 1)
146 fprintf(stderr, "%zd: ", start_char);
147 else
148 fprintf(stderr, "%zd-%zd: ", start_char, end_char - 1);
149 fprintf(stderr, "%s: \"%.*s\"\n", message, (int)size,
150 state->buffer + state->old_buffer_pos);
151 error_errno(state, EINVAL);
152 }
153}
154
155/** Ensure the buffer contains enough characters to read a token. */
156static void fill_buffer(state_t *state)
157{
158 if (state->buffer_pos + MAX_TOKEN_SIZE < BUFFER_SIZE)
159 return;
160
161 size_t empty_size = state->buffer_pos;
162 size_t used_size = BUFFER_SIZE - 1 - state->buffer_pos;
163 memmove(state->buffer, state->buffer + state->buffer_pos, used_size);
164 state->line_offset += state->buffer_pos;
165 state->buffer_pos = 0;
166
167 size_t read_size = fread(state->buffer + used_size, 1, empty_size,
168 state->file);
169 if (ferror(state->file))
170 error_errno(state, EIO);
171 state->buffer[used_size + read_size] = '\0';
172}
173
174/** Read the next token. */
175static void next_token(state_t *state)
176{
177 fill_buffer(state);
178 done_with_token(state);
179 state->old_buffer_pos = state->buffer_pos;
180 char ch = state->buffer[state->buffer_pos];
181 if (ch == '\0') {
182 state->token = TOKEN_EOF;
183 } else if (ch == '#') {
184 while (state->buffer[state->buffer_pos] != '\n'
185 && state->buffer[state->buffer_pos] != '\0') {
186 state->buffer_pos++;
187 fill_buffer(state);
188 }
189 next_token(state);
190 return;
191 } else if (isspace(ch)) {
192 // Will eventually reach the '\0' at the end
193 while (isspace(state->buffer[state->buffer_pos])) {
194 if (state->buffer[state->buffer_pos] == '\n') {
195 state->lineno++;
196 state->line_offset = -state->buffer_pos;
197 }
198 state->buffer_pos++;
199 }
200 next_token(state);
201 return;
202 } else if (isalpha(ch)) {
203 while (isalnum(state->buffer[state->buffer_pos])
204 || state->buffer[state->buffer_pos] == '_')
205 state->buffer_pos++;
206 char *value = str_ndup(state->buffer + state->old_buffer_pos,
207 state->buffer_pos - state->old_buffer_pos);
208 if (!value) {
209 error_errno(state, ENOMEM);
210 } else if (!str_cmp(value, "else")) {
211 state->token = TOKEN_ELSE;
212 free(value);
213 } else if (!str_cmp(value, "false")) {
214 state->token = TOKEN_FALSE;
215 free(value);
216 } else if (!str_cmp(value, "if")) {
217 state->token = TOKEN_IF;
218 free(value);
219 } else if (!str_cmp(value, "struct")) {
220 state->token = TOKEN_STRUCT;
221 free(value);
222 } else if (!str_cmp(value, "transform")) {
223 state->token = TOKEN_TRANSFORM;
224 free(value);
225 } else if (!str_cmp(value, "true")) {
226 state->token = TOKEN_TRUE;
227 free(value);
228 } else {
229 state->token = TOKEN_IDENTIFIER;
230 state->token_string = value;
231 }
232 } else if (isdigit(ch)) {
233 while (isdigit(state->buffer[state->buffer_pos]))
234 state->buffer_pos++;
235 state->token = TOKEN_INTEGER;
236 int rc = bithenge_parse_int(state->buffer +
237 state->old_buffer_pos, &state->token_int);
238 error_errno(state, rc);
239 } else if (ch == '<') {
240 state->token = ch;
241 state->buffer_pos++;
242 if (state->buffer[state->buffer_pos] == '-') {
243 state->buffer_pos++;
244 state->token = TOKEN_LEFT_ARROW;
245 }
246 } else {
247 state->token = ch;
248 state->buffer_pos++;
249 }
250}
251
252/** Allocate memory and handle failure by setting the error in the state. The
253 * caller must check the state for errors before using the return value of this
254 * function. */
255static void *state_malloc(state_t *state, size_t size)
256{
257 if (state->error != EOK)
258 return NULL;
259 void *result = malloc(size);
260 if (result == NULL)
261 error_errno(state, ENOMEM);
262 return result;
263}
264
265/** Reallocate memory and handle failure by setting the error in the state. If
266 * an error occurs, the existing pointer will be returned. */
267static void *state_realloc(state_t *state, void *ptr, size_t size)
268{
269 if (state->error != EOK)
270 return ptr;
271 void *result = realloc(ptr, size);
272 if (result == NULL) {
273 error_errno(state, ENOMEM);
274 return ptr;
275 }
276 return result;
277}
278
279/** Expect and consume a certain token. If the next token is of the wrong type,
280 * an error is caused. */
281static void expect(state_t *state, token_type_t type)
282{
283 if (state->token != type) {
284 syntax_error(state, "unexpected");
285 return;
286 }
287 next_token(state);
288}
289
290/** Expect and consume an identifier token. If the next token is not an
291 * identifier, an error is caused and this function returns null. */
292static char *expect_identifier(state_t *state)
293{
294 if (state->token != TOKEN_IDENTIFIER) {
295 syntax_error(state, "unexpected (identifier expected)");
296 return NULL;
297 }
298 char *val = state->token_string;
299 state->token_string = NULL;
300 next_token(state);
301 return val;
302}
303
304/** Find a transform by name. A reference will be added to the transform.
305 * @return The found transform, or NULL if none was found. */
306static bithenge_transform_t *get_named_transform(state_t *state,
307 const char *name)
308{
309 for (transform_list_t *e = state->transform_list; e; e = e->next) {
310 if (!str_cmp(e->name, name)) {
311 bithenge_transform_inc_ref(e->transform);
312 return e->transform;
313 }
314 }
315 for (int i = 0; bithenge_primitive_transforms[i].name; i++) {
316 if (!str_cmp(bithenge_primitive_transforms[i].name, name)) {
317 bithenge_transform_t *xform =
318 bithenge_primitive_transforms[i].transform;
319 bithenge_transform_inc_ref(xform);
320 return xform;
321 }
322 }
323 return NULL;
324}
325
326/** Add a named transform. This function takes ownership of the name and a
327 * reference to the transform. If an error has occurred, either may be null. */
328static void add_named_transform(state_t *state, bithenge_transform_t *xform, char *name)
329{
330 transform_list_t *entry = state_malloc(state, sizeof(*entry));
331 if (state->error != EOK) {
332 free(name);
333 bithenge_transform_dec_ref(xform);
334 free(entry);
335 return;
336 }
337 entry->name = name;
338 entry->transform = xform;
339 entry->next = state->transform_list;
340 state->transform_list = entry;
341}
342
343static bithenge_transform_t *parse_transform(state_t *state);
344
345static bithenge_expression_t *parse_expression(state_t *state)
346{
347 if (state->token == TOKEN_TRUE || state->token == TOKEN_FALSE) {
348 bool val = state->token == TOKEN_TRUE;
349 next_token(state);
350 bithenge_node_t *node;
351 int rc = bithenge_new_boolean_node(&node, val);
352 if (rc != EOK) {
353 error_errno(state, rc);
354 return NULL;
355 }
356
357 bithenge_expression_t *expr;
358 rc = bithenge_const_expression(&expr, node);
359 if (rc != EOK) {
360 error_errno(state, rc);
361 return NULL;
362 }
363
364 return expr;
365 } else if (state->token == TOKEN_INTEGER) {
366 bithenge_int_t val = state->token_int;
367 next_token(state);
368 bithenge_node_t *node;
369 int rc = bithenge_new_integer_node(&node, val);
370 if (rc != EOK) {
371 error_errno(state, rc);
372 return NULL;
373 }
374
375 bithenge_expression_t *expr;
376 rc = bithenge_const_expression(&expr, node);
377 if (rc != EOK) {
378 error_errno(state, rc);
379 return NULL;
380 }
381
382 return expr;
383 } else if (state->token == TOKEN_IDENTIFIER) {
384 int i;
385 for (i = 0; i < state->num_params; i++)
386 if (!str_cmp(state->parameter_names[i],
387 state->token_string))
388 break;
389
390 if (i == state->num_params) {
391 syntax_error(state, "unknown identifier");
392 return NULL;
393 }
394
395 bithenge_expression_t *expr;
396 int rc = bithenge_param_expression(&expr, i);
397 if (rc != EOK) {
398 error_errno(state, rc);
399 return NULL;
400 }
401
402 next_token(state);
403
404 return expr;
405 } else if (state->token == '.') {
406 next_token(state);
407
408 const char *id = expect_identifier(state);
409 bithenge_node_t *key = NULL;
410 bithenge_expression_t *expr = NULL;
411 int rc = bithenge_current_node_expression(&expr);
412 error_errno(state, rc);
413
414 if (state->error == EOK) {
415 rc = bithenge_new_string_node(&key, id, true);
416 id = NULL;
417 error_errno(state, rc);
418 }
419
420 if (state->error == EOK) {
421 rc = bithenge_member_expression(&expr, expr, key);
422 key = NULL;
423 if (rc != EOK)
424 expr = NULL;
425 error_errno(state, rc);
426 }
427
428 if (state->error != EOK) {
429 free((char *)id);
430 bithenge_node_dec_ref(key);
431 bithenge_expression_dec_ref(expr);
432 return NULL;
433 }
434
435 return expr;
436 } else {
437 syntax_error(state, "expression expected");
438 return NULL;
439 }
440}
441
442// state->token must be TOKEN_IDENTIFIER when this is called
443static bithenge_transform_t *parse_invocation(state_t *state)
444{
445 bithenge_transform_t *result = get_named_transform(state,
446 state->token_string);
447 if (!result)
448 syntax_error(state, "transform not found");
449 next_token(state);
450
451 bithenge_expression_t **params = NULL;
452 int num_params = 0;
453 if (state->token == '(') {
454 next_token(state);
455 while (state->error == EOK && state->token != ')') {
456 if (num_params)
457 expect(state, ',');
458 params = state_realloc(state, params,
459 (num_params + 1)*sizeof(*params));
460 if (state->error != EOK)
461 break;
462 params[num_params] = parse_expression(state);
463 num_params++;
464 }
465 expect(state, ')');
466 }
467
468 /* TODO: show correct error position */
469 if (state->error == EOK
470 && bithenge_transform_num_params(result) != num_params)
471 syntax_error(state, "incorrect number of parameters before");
472
473 if (state->error != EOK) {
474 while (num_params--)
475 bithenge_expression_dec_ref(params[num_params]);
476 free(params);
477 bithenge_transform_dec_ref(result);
478 return NULL;
479 }
480
481 if (num_params) {
482 int rc = bithenge_param_wrapper(&result, result, params);
483 if (rc != EOK) {
484 error_errno(state, rc);
485 result = NULL;
486 }
487 }
488
489 return result;
490}
491
492static bithenge_transform_t *parse_if(state_t *state)
493{
494 expect(state, TOKEN_IF);
495 expect(state, '(');
496 bithenge_expression_t *expr = parse_expression(state);
497 expect(state, ')');
498 expect(state, '{');
499 bithenge_transform_t *true_xform = parse_transform(state);
500 expect(state, '}');
501 expect(state, TOKEN_ELSE);
502 expect(state, '{');
503 bithenge_transform_t *false_xform = parse_transform(state);
504 expect(state, '}');
505 if (state->error != EOK) {
506 bithenge_expression_dec_ref(expr);
507 bithenge_transform_dec_ref(true_xform);
508 bithenge_transform_dec_ref(false_xform);
509 return NULL;
510 }
511 bithenge_transform_t *if_xform;
512 int rc = bithenge_if_transform(&if_xform, expr, true_xform,
513 false_xform);
514 if (rc != EOK) {
515 error_errno(state, rc);
516 return NULL;
517 }
518 return if_xform;
519}
520
521static bithenge_transform_t *parse_struct(state_t *state)
522{
523 size_t num = 0;
524 bithenge_named_transform_t *subxforms;
525 /* We keep an extra space for the {NULL, NULL} terminator. */
526 subxforms = state_malloc(state, sizeof(*subxforms));
527 expect(state, TOKEN_STRUCT);
528 expect(state, '{');
529 while (state->error == EOK && state->token != '}') {
530 if (state->token == '.') {
531 expect(state, '.');
532 subxforms[num].name = expect_identifier(state);
533 expect(state, TOKEN_LEFT_ARROW);
534 } else {
535 subxforms[num].name = NULL;
536 expect(state, TOKEN_LEFT_ARROW);
537 }
538 subxforms[num].transform = parse_transform(state);
539 expect(state, ';');
540 num++;
541 subxforms = state_realloc(state, subxforms,
542 (num + 1)*sizeof(*subxforms));
543 }
544 expect(state, '}');
545
546 if (state->error != EOK) {
547 while (num--) {
548 free((void *)subxforms[num].name);
549 bithenge_transform_dec_ref(subxforms[num].transform);
550 }
551 free(subxforms);
552 return NULL;
553 }
554
555 subxforms[num].name = NULL;
556 subxforms[num].transform = NULL;
557 bithenge_transform_t *result;
558 int rc = bithenge_new_struct(&result, subxforms);
559 if (rc != EOK) {
560 error_errno(state, rc);
561 return NULL;
562 }
563 return result;
564}
565
566/** Parse a transform without composition.
567 * @return The parsed transform, or NULL if an error occurred. */
568static bithenge_transform_t *parse_transform_no_compose(state_t *state)
569{
570 if (state->token == TOKEN_IDENTIFIER) {
571 return parse_invocation(state);
572 } else if (state->token == TOKEN_IF) {
573 return parse_if(state);
574 } else if (state->token == TOKEN_STRUCT) {
575 return parse_struct(state);
576 } else {
577 syntax_error(state, "unexpected (transform expected)");
578 return NULL;
579 }
580}
581
582/** Parse a transform.
583 * @return The parsed transform, or NULL if an error occurred. */
584static bithenge_transform_t *parse_transform(state_t *state)
585{
586 bithenge_transform_t *result = parse_transform_no_compose(state);
587 bithenge_transform_t **xforms = NULL;
588 size_t num = 1;
589 while (state->token == TOKEN_LEFT_ARROW) {
590 expect(state, TOKEN_LEFT_ARROW);
591 xforms = state_realloc(state, xforms,
592 (num + 1) * sizeof(*xforms));
593 if (state->error != EOK)
594 break;
595 xforms[num] = parse_transform_no_compose(state);
596 num++;
597 }
598 if (state->error != EOK) {
599 while (xforms && num--)
600 bithenge_transform_dec_ref(xforms[num]);
601 free(xforms);
602 bithenge_transform_dec_ref(result);
603 return NULL;
604 }
605 if (xforms) {
606 xforms[0] = result;
607 int rc = bithenge_new_composed_transform(&result, xforms, num);
608 if (rc != EOK) {
609 error_errno(state, rc);
610 return NULL;
611 }
612 }
613 return result;
614}
615
616/** Parse a definition. */
617static void parse_definition(state_t *state)
618{
619 expect(state, TOKEN_TRANSFORM);
620 char *name = expect_identifier(state);
621
622 if (state->token == '(') {
623 next_token(state);
624 while (state->error == EOK && state->token != ')') {
625 if (state->num_params)
626 expect(state, ',');
627 state->parameter_names = state_realloc(state,
628 state->parameter_names,
629 (state->num_params + 1)*sizeof(*state->parameter_names));
630 if (state->error != EOK)
631 break;
632 state->parameter_names[state->num_params++] =
633 expect_identifier(state);
634 }
635 expect(state, ')');
636 }
637
638 expect(state, '=');
639 bithenge_transform_t *xform = parse_transform(state);
640 expect(state, ';');
641
642 if (state->error == EOK && state->num_params) {
643 int rc = bithenge_new_param_transform(&xform, xform,
644 state->num_params);
645 if (rc != EOK) {
646 xform = NULL;
647 error_errno(state, rc);
648 }
649 }
650
651 add_named_transform(state, xform, name);
652
653 for (int i = 0; i < state->num_params; i++)
654 free(state->parameter_names[i]);
655 free(state->parameter_names);
656 state->parameter_names = NULL;
657 state->num_params = 0;
658}
659
660/** Initialize the state. */
661static void state_init(state_t *state, const char *filename)
662{
663 state->error = EOK;
664 state->transform_list = NULL;
665 state->parameter_names = NULL;
666 state->num_params = 0;
667 state->token = TOKEN_ERROR;
668 state->old_buffer_pos = state->buffer_pos = BUFFER_SIZE - 1;
669 state->lineno = 1;
670 state->line_offset = (int)-state->buffer_pos + 1;
671 state->filename = filename;
672 state->file = fopen(filename, "r");
673 if (!state->file) {
674 error_errno(state, errno);
675 } else {
676 next_token(state);
677 }
678}
679
680/** Destroy the state. */
681static void state_destroy(state_t *state)
682{
683 done_with_token(state);
684 state->token = TOKEN_ERROR;
685 if (state->file)
686 fclose(state->file);
687 transform_list_t *entry = state->transform_list;
688 while (entry) {
689 transform_list_t *next = entry->next;
690 free(entry->name);
691 bithenge_transform_dec_ref(entry->transform);
692 free(entry);
693 entry = next;
694 }
695 for (int i = 0; i < state->num_params; i++)
696 free(state->parameter_names[i]);
697 free(state->parameter_names);
698}
699
700/** Parse a script file.
701 * @param filename The name of the script file.
702 * @param[out] out Stores the "main" transform.
703 * @return EOK on success, EINVAL on syntax error, or an error code from
704 * errno.h. */
705int bithenge_parse_script(const char *filename, bithenge_transform_t **out)
706{
707 state_t state;
708 state_init(&state, filename);
709 while (state.error == EOK && state.token != TOKEN_EOF)
710 parse_definition(&state);
711 *out = get_named_transform(&state, "main");
712 int rc = state.error;
713 state_destroy(&state);
714 if (rc == EOK && !*out) {
715 fprintf(stderr, "no \"main\" transform\n");
716 rc = EINVAL;
717 }
718 return rc;
719}
720
721/** @}
722 */
Note: See TracBrowser for help on using the repository browser.