source: mainline/contrib/arch/hadlbppp.py@ 2a70672

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 2a70672 was 2a70672, checked in by Martin Decky <martin@…>, 16 years ago

more sophisticated ADL parser

  • Property mode set to 100755
File size: 15.9 KB
Line 
1#!/usr/bin/env python
2#
3# Copyright (c) 2009 Martin Decky
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions
8# are met:
9#
10# - Redistributions of source code must retain the above copyright
11# notice, this list of conditions and the following disclaimer.
12# - Redistributions in binary form must reproduce the above copyright
13# notice, this list of conditions and the following disclaimer in the
14# documentation and/or other materials provided with the distribution.
15# - The name of the author may not be used to endorse or promote products
16# derived from this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28#
29"""
30HelenOS Architecture Description Language and Behavior Protocols preprocessor
31"""
32
33import sys
34import os
35
36INC, POST_INC, BLOCK_COMMENT, LINE_COMMENT, SYSTEM, ARCH, HEAD, BODY, NULL, \
37 INST, VAR, FIN, BIND, TO, SEEN_NL, IFACE, PROTOTYPE, PAR_LEFT, PAR_RIGHT, SIGNATURE, PROTOCOL = range(21)
38
39context = set()
40interface = None
41architecture = None
42protocol = None
43
44def usage(prname):
45 "Print usage syntax"
46 print prname + " <OUTPUT>"
47
48def tabs(cnt):
49 "Return given number of tabs"
50
51 return ("\t" * cnt)
52
53def cond_append(tokens, token, trim):
54 "Conditionally append token to tokens with trim"
55
56 if (trim):
57 token = token.strip(" \t")
58
59 if (token != ""):
60 tokens.append(token)
61
62 return tokens
63
64def split_tokens(string, delimiters, trim = False, separate = False):
65 "Split string to tokens by delimiters, keep the delimiters"
66
67 tokens = []
68 last = 0
69 i = 0
70
71 while (i < len(string)):
72 for delim in delimiters:
73 if (len(delim) > 0):
74
75 if (string[i:(i + len(delim))] == delim):
76 if (separate):
77 tokens = cond_append(tokens, string[last:i], trim)
78 tokens = cond_append(tokens, delim, trim)
79 last = i + len(delim)
80 elif (i > 0):
81 tokens = cond_append(tokens, string[last:i], trim)
82 last = i
83
84 i += len(delim) - 1
85 break
86
87 i += 1
88
89 tokens = cond_append(tokens, string[last:len(string)], trim)
90
91 return tokens
92
93def preproc_bp(outname, tokens):
94 "Preprocess tentative statements in Behavior Protocol"
95
96 result = []
97 i = 0
98
99 while (i < len(tokens)):
100 if (tokens[i] == "tentative"):
101 if ((i + 1 < len(tokens)) and (tokens[i + 1] == "{")):
102 i += 2
103 start = i
104 level = 1
105
106 while ((i < len(tokens)) and (level > 0)):
107 if (tokens[i] == "{"):
108 level += 1
109 elif (tokens[i] == "}"):
110 level -= 1
111
112 i += 1
113
114 if (level == 0):
115 result.append("(")
116 result.extend(preproc_bp(outname, tokens[start:(i - 1)]))
117 result.append(")")
118 result.append("+")
119 result.append("NULL")
120 else:
121 print "%s: Syntax error in tentative statement" % outname
122 else:
123 print "%s: Unexpected tentative statement" % outname
124 else:
125 result.append(tokens[i])
126
127 i += 1
128
129 return result
130
131def preproc_adl(raw, inarg):
132 "Preprocess %% statements in ADL"
133
134 return raw.replace("%%", inarg)
135
136def identifier(token):
137 "Check whether the token is an identifier"
138
139 if (len(token) == 0):
140 return False
141
142 for i, char in enumerate(token):
143 if (i == 0):
144 if ((not char.isalpha()) and (char != "_")):
145 return False
146 else:
147 if ((not char.isalnum()) and (char != "_")):
148 return False
149
150 return True
151
152def descriptor(token):
153 "Check whether the token is an interface descriptor"
154
155 parts = token.split(":")
156 if (len(parts) != 2):
157 return False
158
159 return (identifier(parts[0]) and identifier(parts[1]))
160
161def word(token):
162 "Check whether the token is a word"
163
164 if (len(token) == 0):
165 return False
166
167 for i, char in enumerate(token):
168 if ((not char.isalnum()) and (char != "_") and (char != ".")):
169 return False
170
171 return True
172
173def parse_bp(base, root, inname, nested, outname, indent):
174 "Parse Behavior Protocol"
175
176 if (nested):
177 if (inname[0:1] == "/"):
178 path = os.path.join(base, ".%s" % inname)
179 nested_root = os.path.dirname(path)
180 else:
181 path = os.path.join(root, inname)
182 nested_root = root
183
184 if (not os.path.isfile(path)):
185 print "%s: Unable to include file %s" % (outname, path)
186 return ""
187 else:
188 path = inname
189 nested_root = root
190
191 inf = file(path, "r")
192 tokens = preproc_bp(outname, split_tokens(inf.read(), ["\n", " ", "\t", "(", ")", "{", "}", "[", "]", "/*", "*/", "#", "*", ";", "+", "||", "|", "!", "?"], True, True))
193
194 output = ""
195 inc = False
196 comment = False
197 lcomment = False
198
199 for token in tokens:
200 if (comment):
201 if (token == "*/"):
202 comment = False
203 continue
204
205 if ((not comment) and (token == "/*")):
206 comment = True
207 continue
208
209 if (lcomment):
210 if (token == "\n"):
211 lcomment = False
212 continue
213
214 if ((not lcomment) and (token == "#")):
215 lcomment = True
216 continue
217
218 if (token == "\n"):
219 continue
220
221 if (inc):
222 output += "\n%s(" % tabs(indent)
223
224 bp = parse_bp(base, nested_root, token, True, outname, indent + 1)
225 if (bp.strip() == ""):
226 output += "\n%sNULL" % tabs(indent + 1)
227 else:
228 output += bp
229
230 output += "\n%s)" % tabs(indent)
231 inc = False
232 continue
233
234 if ((token == ";") or (token == "+") or (token == "||") or (token == "|")):
235 output += " %s" % token
236 elif (token == "["):
237 inc = True
238 elif (token == "]"):
239 inc = False
240 elif (token == "("):
241 output += "\n%s%s" % (tabs(indent), token)
242 indent += 1
243 elif (token == ")"):
244 if (indent <= 0):
245 print "%s: Wrong number of parentheses" % outname
246
247 indent -= 1
248 output += "\n%s%s" % (tabs(indent), token)
249 elif (token == "{"):
250 output += " %s" % token
251 indent += 1
252 elif (token == "}"):
253 if (indent <= 0):
254 print "%s: Wrong number of parentheses" % outname
255
256 indent -= 1
257 output += "\n%s%s" % (tabs(indent), token)
258 elif (token == "*"):
259 output += "%s" % token
260 elif ((token == "!") or (token == "?") or (token == "NULL")):
261 output += "\n%s%s" % (tabs(indent), token)
262 else:
263 output += "%s" % token
264
265 inf.close()
266
267 return output
268
269def parse_adl(base, root, inname, nested, indent):
270 "Parse Architecture Description Language"
271
272 if (nested):
273 parts = inname.split("%")
274
275 if (len(parts) > 1):
276 inarg = parts[1]
277 else:
278 inarg = "%%"
279
280 if (parts[0][0:1] == "/"):
281 path = os.path.join(base, ".%s" % parts[0])
282 nested_root = os.path.dirname(path)
283 else:
284 path = os.path.join(root, parts[0])
285 nested_root = root
286
287 if (not os.path.isfile(path)):
288 print "%s: Unable to include file %s" % (inname, path)
289 return ""
290 else:
291 inarg = "%%"
292 path = inname
293 nested_root = root
294
295 inf = file(path, "r")
296
297 raw = preproc_adl(inf.read(), inarg)
298 tokens = split_tokens(raw, ["\n", " ", "\t", "(", ")", "{", "}", "[", "]", "/*", "*/", "#", ";"], True, True)
299 output = ""
300
301 for token in tokens:
302
303 # Includes
304
305 if (INC in context):
306 context.remove(INC)
307
308 if (PROTOCOL in context):
309 protocol += parse_bp(base, nested_root, token, True, "xxx", indent).strip()
310 else:
311 output += "\n%s" % tabs(indent)
312 output += parse_adl(base, nested_root, token, True, indent).strip()
313
314 context.add(POST_INC)
315 continue
316
317 if (POST_INC in context):
318 if (token != "]"):
319 print "%s: Expected ]" % inname
320
321 context.add(SEEN_NL)
322 context.remove(POST_INC)
323 continue
324
325 # Comments and newlines
326
327 if (BLOCK_COMMENT in context):
328 if (token == "*/"):
329 context.remove(BLOCK_COMMENT)
330
331 continue
332
333 if (LINE_COMMENT in context):
334 if (token == "\n"):
335 context.remove(LINE_COMMENT)
336
337 continue
338
339 # Any context
340
341 if (token == "/*"):
342 context.add(BLOCK_COMMENT)
343 continue
344
345 if (token == "#"):
346 context.add(LINE_COMMENT)
347 continue
348
349 if (token == "["):
350 context.add(INC)
351 continue
352
353 # Seen newline
354
355 if (SEEN_NL in context):
356 context.remove(SEEN_NL)
357 if (token == "\n"):
358 output += "\n%s" % tabs(indent)
359 continue
360 else:
361 if (token == "\n"):
362 continue
363
364 # "interface"
365
366 if (IFACE in context):
367 if (NULL in context):
368 if (token != ";"):
369 print "%s: Expected ;" % inname
370 else:
371 output += "%s\n" % token
372
373 context.remove(NULL)
374 context.remove(IFACE)
375 interface = None
376 continue
377
378 if (BODY in context):
379 if (PROTOCOL in context):
380 if (token == "{"):
381 indent += 1
382 elif (token == "}"):
383 indent -= 1
384
385 if (indent == 1):
386 output += protocol.strip()
387 protocol = None
388
389 output += "\n%s" % token
390
391 context.remove(PROTOCOL)
392 context.remove(BODY)
393 context.add(NULL)
394 else:
395 protocol += token
396
397 continue
398
399 if (PROTOTYPE in context):
400 if (FIN in context):
401 if (token != ";"):
402 print "%s: Expected ;" % inname
403 else:
404 output += "%s" % token
405
406 context.remove(FIN)
407 context.remove(PROTOTYPE)
408 continue
409
410 if (PAR_RIGHT in context):
411 if (token == ")"):
412 output += "%s" % token
413 context.remove(PAR_RIGHT)
414 context.add(FIN)
415 else:
416 output += " %s" % token
417
418 continue
419
420 if (SIGNATURE in context):
421 output += "%s" % token
422 if (token == ")"):
423 context.remove(SIGNATURE)
424 context.add(FIN)
425
426 context.remove(SIGNATURE)
427 context.add(PAR_RIGHT)
428 continue
429
430 if (PAR_LEFT in context):
431 if (token != "("):
432 print "%s: Expected (" % inname
433 else:
434 output += "%s" % token
435
436 context.remove(PAR_LEFT)
437 context.add(SIGNATURE)
438 continue
439
440 if (not identifier(token)):
441 print "%s: Method identifier expected" % inname
442 else:
443 output += "%s" % token
444
445 context.add(PAR_LEFT)
446 continue
447
448 if (token == "}"):
449 if (indent != 1):
450 print "%s: Wrong number of parentheses" % inname
451 else:
452 indent = 0
453 output += "\n%s" % token
454
455 context.remove(BODY)
456 context.add(NULL)
457 continue
458
459 if (token == "ipcarg_t"):
460 output += "\n%s%s " % (tabs(indent), token)
461 context.add(PROTOTYPE)
462 continue
463
464 if (token == "protocol:"):
465 output += "\n%s%s" % (tabs(indent - 1), token)
466 context.add(PROTOCOL)
467 protocol = ""
468 continue
469
470 print "%s: Unknown token %s in interface" % (inname, token)
471 continue
472
473 if (HEAD in context):
474 if (token == "{"):
475 output += "%s" % token
476 indent += 2
477 context.remove(HEAD)
478 context.add(BODY)
479 continue
480
481 if (token == ";"):
482 output += "%s\n" % token
483 context.remove(HEAD)
484 context.remove(ARCH)
485 context.discard(SYSTEM)
486 continue
487
488 if (not word(token)):
489 print "%s: Expected word" % inname
490 else:
491 output += "%s " % token
492
493 continue
494
495 if (not identifier(token)):
496 print "%s: Expected interface name" % inname
497 else:
498 interface = token
499 output += "%s " % token
500
501 context.add(HEAD)
502 continue
503
504 # "architecture"
505
506 if (ARCH in context):
507 if (NULL in context):
508 if (token != ";"):
509 print "%s: Expected ;" % inname
510 else:
511 output += "%s\n" % token
512
513 context.remove(NULL)
514 context.remove(ARCH)
515 context.discard(SYSTEM)
516 architecture = None
517 continue
518
519 if (BODY in context):
520 if (BIND in context):
521 if (FIN in context):
522 if (token != ";"):
523 print "%s: Expected ;" % inname
524 else:
525 output += "%s" % token
526
527 context.remove(FIN)
528 context.remove(BIND)
529 continue
530
531 if (VAR in context):
532 if (not descriptor(token)):
533 print "%s: Expected second interface descriptor" % inname
534 else:
535 output += "%s" % token
536
537 context.add(FIN)
538 context.remove(VAR)
539 continue
540
541 if (TO in context):
542 if (token != "to"):
543 print "%s: Expected to" % inname
544 else:
545 output += "%s " % token
546
547 context.add(VAR)
548 context.remove(TO)
549 continue
550
551 if (not descriptor(token)):
552 print "%s: Expected interface descriptor" % inname
553 else:
554 output += "%s " % token
555
556 context.add(TO)
557 continue
558
559 if (INST in context):
560 if (FIN in context):
561 if (token != ";"):
562 print "%s: Expected ;" % inname
563 else:
564 output += "%s" % token
565
566 context.remove(FIN)
567 context.remove(INST)
568 continue
569
570 if (VAR in context):
571 if (not identifier(token)):
572 print "%s: Expected instance name" % inname
573 else:
574 output += "%s" % token
575
576 context.add(FIN)
577 context.remove(VAR)
578 continue
579
580 if (not identifier(token)):
581 print "%s: Expected frame/architecture type" % inname
582 else:
583 output += "%s " % token
584
585 context.add(VAR)
586 continue
587
588 if (token == "}"):
589 if (indent != 1):
590 print "%s: Wrong number of parentheses" % inname
591 else:
592 indent -= 1
593 output += "\n%s" % token
594
595 context.remove(BODY)
596 context.add(NULL)
597 continue
598
599 if (token == "inst"):
600 output += "\n%s%s " % (tabs(indent), token)
601 context.add(INST)
602 continue
603
604 if (token == "bind"):
605 output += "\n%s%s " % (tabs(indent), token)
606 context.add(BIND)
607 continue
608
609 print "%s: Unknown token %s in architecture" % (inname, token)
610 continue
611
612 if (HEAD in context):
613 if (token == "{"):
614 output += "%s" % token
615 indent += 1
616 context.remove(HEAD)
617 context.add(BODY)
618 continue
619
620 if (token == ";"):
621 output += "%s\n" % token
622 context.remove(HEAD)
623 context.remove(ARCH)
624 context.discard(SYSTEM)
625 continue
626
627 if (not word(token)):
628 print "%s: Expected word" % inname
629 else:
630 output += "%s " % token
631
632 continue
633
634 if (not identifier(token)):
635 print "%s: Expected architecture name" % inname
636 else:
637 architecture = token
638 output += "%s " % token
639
640 context.add(HEAD)
641 continue
642
643 # "system architecture"
644
645 if (SYSTEM in context):
646 if (token != "architecture"):
647 print "%s: Expected architecture" % inname
648 else:
649 output += "%s " % token
650
651 context.add(ARCH)
652 continue
653
654 if (token == "interface"):
655 output += "\n%s " % token
656 context.add(IFACE)
657 continue
658
659 if (token == "system"):
660 output += "\n%s " % token
661 context.add(SYSTEM)
662 continue
663
664 if (token == "architecture"):
665 output += "\n%s " % token
666 context.add(ARCH)
667 continue
668
669 print "%s: Unknown token %s" % (inname, token)
670
671 inf.close()
672
673 return output
674
675def open_adl(base, root, inname, outname):
676 "Open Architecture Description file"
677
678 context.clear()
679 interface = None
680 architecture = None
681 protocol = None
682
683 adl = parse_adl(base, root, inname, False, 0)
684 if (adl.strip() == ""):
685 adl = "/* Empty */\n"
686
687 if (os.path.isfile(outname)):
688 print "%s: File already exists, overwriting" % outname
689
690 outf = file(outname, "w")
691 outf.write(adl.strip())
692 outf.close()
693
694def recursion(base, root, output, level):
695 "Recursive directory walk"
696
697 for name in os.listdir(root):
698 canon = os.path.join(root, name)
699
700 if (os.path.isfile(canon)):
701 fcomp = split_tokens(canon, ["."])
702 cname = canon.split("/")
703
704 if (fcomp[-1] == ".adl"):
705 output_path = os.path.join(output, cname[-1])
706 open_adl(base, root, canon, output_path)
707
708 if (os.path.isdir(canon)):
709 recursion(base, canon, output, level + 1)
710
711def main():
712 if (len(sys.argv) < 2):
713 usage(sys.argv[0])
714 return
715
716 path = os.path.abspath(sys.argv[1])
717 if (not os.path.isdir(path)):
718 print "Error: <OUTPUT> is not a directory"
719 return
720
721 recursion(".", ".", path, 0)
722
723if __name__ == '__main__':
724 main()
Note: See TracBrowser for help on using the repository browser.