source: mainline/uspace/app/bdsh/cmds/modules/grep/grep.c@ 0564ee47

Last change on this file since 0564ee47 was 0564ee47, checked in by parth-27 <parth1020patel@…>, 5 years ago

grep command added

  • Property mode set to 100644
File size: 16.3 KB
Line 
1#include <stdio.h>
2#include <stdlib.h>
3#include <ctype.h>
4#include "config.h"
5#include "util.h"
6#include "errors.h"
7#include "entry.h"
8#include "grep.h"
9#include "cmds.h"
10
11static const char *documentation[] = {
12 "grep searches a file for a given pattern. Execute by",
13 " grep [flags] regular_expression file_list\n",
14 "Flags are single characters preceeded by '-':",
15 " -c Only a count of matching lines is printed",
16 " -f Print file name for matching lines switch, see below",
17 " -n Each line is preceeded by its line number",
18 " -v Only print non-matching lines\n",
19 "The file_list is a list of files (wildcards are acceptable on RSX modes).",
20 "\nThe file name is normally printed if there is a file given.",
21 "The -f flag reverses this action (print name no file, not if more).\n",
22 0};
23
24static const char *patdoc[] = {
25 "The regular_expression defines the pattern to search for. Upper- and",
26 "lower-case are always ignored. Blank lines never match. The expression",
27 "should be quoted to prevent file-name translation.",
28 "x An ordinary character (not mentioned below) matches that character.",
29 "'\\' The backslash quotes any character. \"\\$\" matches a dollar-sign.",
30 "'^' A circumflex at the beginning of an expression matches the",
31 " beginning of a line.",
32 "'$' A dollar-sign at the end of an expression matches the end of a line.",
33 "'.' A period matches any character except \"new-line\".",
34 "':a' A colon matches a class of characters described by the following",
35 "':d' character. \":a\" matches any alphabetic, \":d\" matches digits,",
36 "':n' \":n\" matches alphanumerics, \": \" matches spaces, tabs, and",
37 "': ' other control characters, such as new-line.",
38 "'*' An expression followed by an asterisk matches zero or more",
39 " occurrances of that expression: \"fo*\" matches \"f\", \"fo\"",
40 " \"foo\", etc.",
41 "'+' An expression followed by a plus sign matches one or more",
42 " occurrances of that expression: \"fo+\" matches \"fo\", etc.",
43 "'-' An expression followed by a minus sign optionally matches",
44 " the expression.",
45 "'[]' A string enclosed in square brackets matches any character in",
46 " that string, but no others. If the first character in the",
47 " string is a circumflex, the expression matches any character",
48 " except \"new-line\" and the characters in the string. For",
49 " example, \"[xyz]\" matches \"xx\" and \"zyx\", while \"[^xyz]\"",
50 " matches \"abc\" but not \"axb\". A range of characters may be",
51 " specified by two characters separated by \"-\". Note that,",
52 " [a-z] matches alphabetics, while [z-a] never matches.",
53 "The concatenation of regular expressions is a regular expression.",
54 0};
55
56#define LMAX 512
57#define PMAX 256
58
59#define CHAR 1
60#define BOL 2
61#define EOL 3
62#define ANY 4
63#define CLASS 5
64#define NCLASS 6
65#define STAR 7
66#define PLUS 8
67#define MINUS 9
68#define ALPHA 10
69#define DIGIT 11
70#define NALPHA 12
71#define PUNCT 13
72#define RANGE 14
73#define ENDPAT 15
74
75int cflag = 0, fflag = 0, nflag = 0, vflag = 0, nfile = 0, debug = 0;
76
77char *pp, lbuf[LMAX], pbuf[PMAX];
78
79/*** Display a file name *******************************/
80static int file(char *s)
81{
82 printf("File %s:\n", s);
83 return CMD_SUCCESS;
84}
85
86/*** Report unopenable file ****************************/
87static int cant(char *s)
88{
89 fprintf(stderr, "%s: cannot open\n", s);
90 return CMD_SUCCESS;
91}
92
93/*** Give good help ************************************/
94static int help(const char **hp)
95{
96 register const char **dp;
97
98 for (dp = hp; *dp; ++dp)
99 printf("%s\n", *dp);
100
101 return CMD_SUCCESS;
102}
103
104/*** Display usage summary *****************************/
105static int usage(const char *s)
106{
107 fprintf(stderr, "?GREP-E-%s\n", s);
108 fprintf(stderr,
109 "Usage: grep [-cfnv] pattern [file ...]. grep ? for help\n");
110 exit(1);
111}
112
113/*** Report an error ***********************************/
114static int error(const char *s)
115{
116 fprintf(stderr, "%s", s);
117 return CMD_FAILURE;
118}
119
120/*** Store an entry in the pattern buffer **************/
121static int store(int op)
122{
123 if (pp >= &pbuf[PMAX])
124 error("Pattern too complex\n");
125 *pp++ = op;
126
127 return CMD_SUCCESS;
128}
129
130/*** Report a bad pattern specification ****************/
131static int badpat(const char *message, char *source, char *stop)
132/* Error message */
133/* Pattern start */
134/* Pattern end */
135{
136 fprintf(stderr, "-GREP-E-%s, pattern is\"%s\"\n", message, source);
137 fprintf(stderr, "-GREP-E-Stopped at byte %ld, '%c'\n",
138 stop - source, stop[-1]);
139 error("?GREP-E-Bad pattern\n");
140
141 return CMD_SUCCESS;
142}
143
144
145/*** Match partial line with pattern *******************/
146static char *pmatch(char *line, char *pattern)
147/* (partial) line to match */
148/* (partial) pattern to match */
149{
150 register char *l; /* Current line pointer */
151 register char *p; /* Current pattern pointer */
152 register char c; /* Current character */
153 char *e; /* End for STAR and PLUS match */
154 int op; /* Pattern operation */
155 int n; /* Class counter */
156 char *are; /* Start of STAR match */
157
158 l = line;
159 if (debug > 1)
160 printf("pmatch(\"%s\")\n", line);
161 p = pattern;
162 while ((op = *p++) != ENDPAT)
163 {
164 if (debug > 1)
165 printf("byte[%ld] = 0%o, '%c', op = 0%o\n",
166 (l - line), *l, *l, op);
167 switch (op)
168 {
169
170 case CHAR:
171 if (tolower(*l++) != *p++)
172 return (0);
173 break;
174
175 case BOL:
176 if (l != lbuf)
177 return (0);
178 break;
179
180 case EOL:
181 if (*l != '\0')
182 return (0);
183 break;
184
185 case ANY:
186 if (*l++ == '\0')
187 return (0);
188 break;
189
190 case DIGIT:
191 if ((c = *l++) < '0' || (c > '9'))
192 return (0);
193 break;
194
195 case ALPHA:
196 c = tolower(*l++);
197 if (c < 'a' || c > 'z')
198 return (0);
199 break;
200
201 case NALPHA:
202 c = tolower(*l++);
203 if (c >= 'a' && c <= 'z')
204 break;
205 else if (c < '0' || c > '9')
206 return (0);
207 break;
208
209 case PUNCT:
210 c = *l++;
211 if (c == 0 || c > ' ')
212 return (0);
213 break;
214
215 case CLASS:
216 case NCLASS:
217 c = tolower(*l++);
218 n = *p++ & 0377;
219 do
220 {
221 if (*p == RANGE)
222 {
223 p += 3;
224 n -= 2;
225 if (c >= p[-2] && c <= p[-1])
226 break;
227 }
228 else if (c == *p++)
229 break;
230 } while (--n > 1);
231 if ((op == CLASS) == (n <= 1))
232 return (0);
233 if (op == CLASS)
234 p += n - 2;
235 break;
236
237 case MINUS:
238 e = pmatch(l, p); /* Look for a match */
239 while (*p++ != ENDPAT)
240 ; /* Skip over pattern */
241 if (e) /* Got a match? */
242 l = e; /* Yes, update string */
243 break; /* Always succeeds */
244
245 case PLUS: /* One or more ... */
246 if ((l = pmatch(l, p)) == 0)
247 return (0); /* Gotta have a match */
248 break;
249 case STAR: /* Zero or more ... */
250 are = l; /* Remember line start */
251 while (*l && (e = pmatch(l, p)))
252 l = e; /* Get longest match */
253 while (*p++ != ENDPAT)
254 ; /* Skip over pattern */
255 while (l >= are)
256 { /* Try to match rest */
257 if ((e = pmatch(l, p)))
258 return (e);
259 --l; /* Nope, try earlier */
260 }
261 return (0); /* Nothing else worked */
262 break;
263
264 default:
265 printf("Bad op code %d\n", op);
266 error("Cannot happen -- match\n");
267 }
268 }
269 return (l);
270}
271
272
273
274/*** Compile a class (within []) ***********************/
275static char *cclass(char *source, char *src)
276/* Pattern start -- for error msg. */
277/* Class start */
278{
279 register char *s; /* Source pointer */
280 register char *cp; /* Pattern start */
281 register int c; /* Current character */
282 int o; /* Temp */
283
284 s = src;
285 o = CLASS;
286 if (*s == '^')
287 {
288 ++s;
289 o = NCLASS;
290 }
291 store(o);
292 cp = pp;
293 store(0); /* Byte count */
294 while ((c = *s++) && c != ']')
295 {
296 if (c == '\\')
297 { /* Store quoted char */
298 if ((c = *s++) == '\0') /* Gotta get something */
299 badpat("Class terminates badly", source, s);
300 else
301 store(tolower(c));
302 }
303 else if (c == '-' &&
304 (pp - cp) > 1 && *s != ']' && *s != '\0')
305 {
306 c = pp[-1]; /* Range start */
307 pp[-1] = RANGE; /* Range signal */
308 store(c); /* Re-store start */
309 c = *s++; /* Get end char and*/
310 store(tolower(c)); /* Store it */
311 }
312 else
313 {
314 store(tolower(c)); /* Store normal char */
315 }
316 }
317 if (c != ']')
318 badpat("Unterminated class", source, s);
319 if ((c = (pp - cp)) >= 256)
320 badpat("Class too large", source, s);
321 if (c == 0)
322 badpat("Empty class", source, s);
323 *cp = c;
324 return (s);
325}
326
327/*** Match line (lbuf) with pattern (pbuf) return 1 if match ***/
328static int match()
329{
330 register char *l; /* Line pointer */
331
332 for (l = lbuf; *l; ++l)
333 {
334 if (pmatch(l, pbuf))
335 return 1;
336
337 //printf("%s",l);
338 }
339
340 return 0;
341
342}
343
344
345/*** Scan the file for the pattern in pbuf[] ***********/
346static int grep(FILE *fp, char *fn)
347/* File to process */
348/* File name (for -f option) */
349{
350 register int lno, count, m;
351
352 //printf("\n Inside Grep %s",fn);
353 //int i=0;
354 //for(i = 0;i<256;i++)
355 //{
356 // if(pbuf[i])
357 // printf("%c",pbuf[i]);
358 // else
359 // printf("%d",i);
360 //}
361
362 lno = 0;
363 count = 0;
364 while (fgets(lbuf, LMAX, fp))
365 {
366 ++lno;
367 m = match();
368 if ((m && !vflag) || (!m && vflag))
369 {
370 ++count;
371 if (!cflag)
372 {
373 if (fflag && fn)
374 {
375 file(fn);
376 fn = 0;
377 }
378 if (nflag)
379 {
380 printf("%d\t", lno);
381 }
382 printf("%s\n", lbuf);
383 }
384 }
385 }
386 if (cflag)
387 {
388 if (fflag && fn)
389 file(fn);
390 printf("%d\n", count);
391 }
392
393 return CMD_SUCCESS;
394}
395
396
397/*** Compile the pattern into global pbuf[] ************/
398static int compile(char *source)
399/* Pattern to compile */
400{
401 register char *s; /* Source string pointer */
402 register char *lp; /* Last pattern pointer */
403 register int c; /* Current character */
404 int o; /* Temp */
405 char *spp; /* Save beginning of pattern */
406
407
408 s = source;
409 //debug=1;
410 if (debug)
411 printf("Pattern = %s \n", s);
412
413
414 pp = pbuf;
415
416 while ((c = *s++))
417 {
418 /*
419 * STAR, PLUS and MINUS are special.
420 */
421 if (c == '*' || c == '+' || c == '-')
422 {
423 if (pp == pbuf ||
424 (o = pp[-1]) == BOL ||
425 o == EOL ||
426 o == STAR ||
427 o == PLUS ||
428 o == MINUS)
429 badpat("Illegal occurrance op.", source, s);
430 store(ENDPAT);
431 store(ENDPAT);
432 spp = pp; /* Save pattern end */
433 while (--pp > lp) /* Move pattern down */
434 *pp = pp[-1]; /* one byte */
435 *pp = (c == '*') ? STAR : (c == '-') ? MINUS : PLUS;
436 pp = spp; /* Restore pattern end */
437 continue;
438 }
439 /*
440 * All the rest.
441 */
442 lp = pp; /* Remember start */
443 switch (c)
444 {
445
446 case '^':
447 store(BOL);
448 break;
449
450 case '$':
451 store(EOL);
452 break;
453
454 case '.':
455 store(ANY);
456 break;
457
458 case '[':
459 s = cclass(source, s);
460 break;
461
462 case ':':
463 if (*s)
464 {
465 switch (tolower(c = *s++))
466 {
467
468 case 'a':
469 case 'A':
470 store(ALPHA);
471 break;
472
473 case 'd':
474 case 'D':
475 store(DIGIT);
476 break;
477
478 case 'n':
479 case 'N':
480 store(NALPHA);
481 break;
482
483 case ' ':
484 store(PUNCT);
485 break;
486
487 default:
488 badpat("Unknown : type", source, s);
489 }
490 break;
491 }
492 else
493 badpat("No : type", source, s);
494
495 break;
496
497 case '\\':
498 if (*s)
499 c = *s++;
500
501 break;
502
503 default:
504 store(CHAR);
505 store(tolower(c));
506 }
507 }
508 store(ENDPAT);
509 store(0); /* Terminate string */
510 if (debug)
511 {
512 for (lp = pbuf; lp < pp;)
513 {
514 if ((c = (*lp++ & 0377)) < ' ')
515 printf("\\%o ", c);
516 else
517 printf("%c ", c);
518 }
519 printf("\n");
520 }
521
522 return CMD_SUCCESS;
523}
524
525
526static const char *cmdname = "grep";
527
528/* Dispays help for grep in various levels */
529void help_cmd_grep(unsigned int level)
530{
531 printf("This is the %s help for '%s'.\n",
532 level ? EXT_HELP : SHORT_HELP, cmdname);
533 return;
534}
535
536/* Main entry point for grep, accepts an array of arguments */
537int cmd_grep(char **argv)
538{
539 int argc;
540
541 /* Count the arguments */
542 for (argc = 0; argv[argc] != NULL; argc++);
543
544
545 register char *p;
546 register int c, i;
547 int gotpattern;
548
549 FILE *f;
550
551 if (argc <= 1)
552 usage("No arguments");
553 if (argc == 2 && argv[1][0] == '?' && argv[1][1] == 0)
554 {
555 help(documentation);
556 help(patdoc);
557 return CMD_SUCCESS;
558 }
559 nfile = argc - 1;
560 gotpattern = 0;
561 for (i = 1; i < argc; ++i)
562 {
563 p = argv[i];
564 if (*p == '-')
565 {
566 ++p;
567 while ((c = *p++))
568 {
569 switch (tolower(c))
570 {
571
572 case '?':
573 help(documentation);
574 break;
575
576 case 'C':
577 case 'c':
578 ++cflag;
579 break;
580
581 case 'D':
582 case 'd':
583 ++debug;
584 break;
585
586 case 'F':
587 case 'f':
588 ++fflag;
589 break;
590
591 case 'n':
592 case 'N':
593 ++nflag;
594 break;
595
596 case 'v':
597 case 'V':
598 ++vflag;
599 break;
600
601 default:
602 usage("Unknown flag");
603 }
604 }
605 argv[i] = 0;
606 --nfile;
607 }
608 else if (!gotpattern)
609 {
610 compile(p);
611 argv[i] = 0;
612 ++gotpattern;
613 --nfile;
614 }
615 }
616
617
618 if (!gotpattern)
619 usage("No pattern");
620 if (nfile == 0)
621 grep(stdin, 0);
622 else
623 {
624
625 fflag = (fflag) ^ (nfile > 0);
626 for (i = 1; i < argc; ++i)
627 {
628 if ((p = argv[i]))
629 {
630 if ((f = fopen(p, "r")) == NULL)
631 cant(p);
632 else
633 {
634 grep(f, p);
635 fclose(f);
636 }
637 }
638 }
639 }
640
641 cflag = 0;
642 fflag = 0;
643 nflag = 0;
644 vflag = 0;
645 nfile = 0;
646 debug = 0;
647 return CMD_SUCCESS;
648}
649
Note: See TracBrowser for help on using the repository browser.