source: mainline/uspace/app/bdsh/cmds/modules/grep/grep.c@ 4db4718d

Last change on this file since 4db4718d was 4db4718d, checked in by parth-27 <parth1020patel@…>, 5 years ago

grep command added

  • Property mode set to 100644
File size: 13.3 KB
Line 
1/*
2 * The information in this document is subject to change
3 * without notice and should not be construed as a commitment
4 * by Digital Equipment Corporation or by DECUS.
5 *
6 * Neither Digital Equipment Corporation, DECUS, nor the authors
7 * assume any responsibility for the use or reliability of this
8 * document or the described software.
9 *
10 * Copyright (C) 1980, DECUS
11 *
12 * General permission to copy or modify, but not for profit, is
13 * hereby granted, provided that the above copyright notice is
14 * included and reference made to the fact that reproduction
15 * privileges were granted by DECUS.
16 */
17
18#include "grep.h"
19#include <ctype.h>
20#include <stdio.h>
21#include <stdlib.h>
22#include "cmds.h"
23#include "config.h"
24#include "entry.h"
25#include "errors.h"
26#include "util.h"
27
28static const char *documentation[] = {
29 "For a given pattern, grep searches the file to match the former. It is "
30 "executed by",
31 " grep [flags]/swithes regular_expression file_list\n",
32 "Flags/switches uses '-' followed by single characters:",
33 " -n Prints line number before every line",
34 " -c Shows the count of matched lines",
35 " -v Print non-matched lines\n",
36 " -f Print the file name for matching lines switch, see below",
37 "The file_list is basically a list of files (wildcards are acceptable on "
38 "RSX modes).",
39 "\nIf a file is given, the file name is normally printed.",
40 "The -f flag basically reverses this action (print name no file, not if "
41 "more).\n",
42 0};
43
44static const char *patdoc[] = {
45 "The regular_expression defines the pattern to search for. Upper- and",
46 "lower-case are always ignored. Blank lines never match. The expression",
47 "should be quoted to prevent file-name translation.",
48 "x An ordinary character (not mentioned below) matches that "
49 "character.",
50 "'\\' The backslash quotes any character. \"\\$\" matches a "
51 "dollar-sign.",
52 "'^' A circumflex at the beginning of an expression matches the",
53 " beginning of a line.",
54 "'$' A dollar-sign at the end of an expression matches the end of a "
55 "line.",
56 "'.' A period matches any character except \"new-line\".",
57 "':a' A colon matches a class of characters described by the following",
58 "':d' character. \":a\" matches any alphabetic, \":d\" matches "
59 "digits,",
60 "':n' \":n\" matches alphanumerics, \": \" matches spaces, tabs, and",
61 "': ' other control characters, such as new-line.",
62 "'*' An expression followed by an asterisk matches zero or more",
63 " occurrances of that expression: \"fo*\" matches \"f\", \"fo\"",
64 " \"foo\", etc.",
65 "'+' An expression followed by a plus sign matches one or more",
66 " occurrances of that expression: \"fo+\" matches \"fo\", etc.",
67 "'-' An expression followed by a minus sign optionally matches",
68 " the expression.",
69 "'[]' A string enclosed in square brackets matches any character in",
70 " that string, but no others. If the first character in the",
71 " string is a circumflex, the expression matches any character",
72 " except \"new-line\" and the characters in the string. For",
73 " example, \"[xyz]\" matches \"xx\" and \"zyx\", while \"[^xyz]\"",
74 " matches \"abc\" but not \"axb\". A range of characters may be",
75 " specified by two characters separated by \"-\". Note that,",
76 " [a-z] matches alphabetics, while [z-a] never matches.",
77 "The concatenation of regular expressions is a regular expression.", 0};
78
79#define LMAX 512
80#define PMAX 256
81
82#define CHAR_ 1
83#define BOL_ 2
84#define EOL_ 3
85#define ANY_ 4
86#define CLASS_ 5
87#define NCLASS_ 6
88#define STAR_ 7
89#define PLUS_ 8
90#define MINUS_ 9
91#define ALPHA_ 10
92#define DIGIT_ 11
93#define NALPHA_ 12
94#define PUNCT_ 13
95#define RANGE_ 14
96#define ENDPAT_ 15
97
98int cflag = 0, fflag = 0, nflag = 0, vflag = 0, nfile = 0, debug = 0;
99
100char *pp, lbuf[LMAX], pbuf[PMAX];
101
102// Display name of a file
103static int displayFile(char *s)
104{
105 printf("File %s:\n", s);
106 return CMD_SUCCESS;
107}
108
109// Report an unopenable file
110static int cantOpen(char *s)
111{
112 fprintf(stderr, "%s: cannot open\n", s);
113 return CMD_SUCCESS;
114}
115
116// Gives good help
117static int help(const char **hp)
118{
119 register const char **dp;
120
121 for (dp = hp; *dp; ++dp)
122 printf("%s\n", *dp);
123
124 return CMD_SUCCESS;
125}
126
127// Display usage summary
128static int usageSummary(const char *s)
129{
130 fprintf(stderr, "?GREP-E-%s\n", s);
131 fprintf(stderr,
132 "Usage: grep [-cfnv] pattern [file ...]. grep ? for help\n");
133 exit(1);
134}
135
136// Display error
137static int displayError(const char *s)
138{
139 fprintf(stderr, "%s", s);
140 return CMD_FAILURE;
141}
142
143// Store an entry in the pattern buffer
144static int storeEntry(int op)
145{
146 if (pp >= &pbuf[PMAX])
147 displayError("Pattern is complex\n");
148 *pp++ = op;
149
150 return CMD_SUCCESS;
151}
152
153// Report a bad pattern specification
154static int badPattern(const char *message, char *source, char *stop)
155/* Error message */
156/* Pattern start & end */
157{
158 fprintf(stderr, "-GREP-E-%s, pattern is\"%s\"\n", message, source);
159 fprintf(stderr, "-GREP-E-Stopped at byte %ld, '%c'\n", stop - source,
160 stop[-1]);
161 displayError("?GREP-E-Bad pattern\n");
162
163 return CMD_SUCCESS;
164}
165
166// Match partial line with the pattern
167static char *partialMatch(char *line, char *pattern)
168// (partial) line to match
169// (partial) pattern to match
170{
171 register char *l; // Current line pointer
172 register char *p; // Current pattern pointer
173 register char c; // Current character
174 char *e; // End for STAR and PLUS match
175 int op; // Pattern operation
176 int n; // Class counter
177 char *are; // Start of STAR match
178
179 l = line;
180 if (debug > 1)
181 printf("patternMatch(\"%s\")\n", line);
182 p = pattern;
183 while ((op = *p++) != ENDPAT_) {
184 if (debug > 1)
185 printf("byte[%ld] = 0%o, '%c', op = 0%o\n", (l - line),
186 *l, *l, op);
187 switch (op) {
188
189 case CHAR_:
190 if (tolower(*l++) != *p++)
191 return (0);
192 break;
193
194 case BOL_:
195 if (l != lbuf)
196 return (0);
197 break;
198
199 case EOL_:
200 if (*l != '\0')
201 return (0);
202 break;
203
204 case ANY_:
205 if (*l++ == '\0')
206 return (0);
207 break;
208
209 case DIGIT_:
210 if ((c = *l++) < '0' || (c > '9'))
211 return (0);
212 break;
213
214 case ALPHA_:
215 c = tolower(*l++);
216 if (c < 'a' || c > 'z')
217 return (0);
218 break;
219
220 case NALPHA_:
221 c = tolower(*l++);
222 if (c >= 'a' && c <= 'z')
223 break;
224 else if (c < '0' || c > '9')
225 return (0);
226 break;
227
228 case PUNCT_:
229 c = *l++;
230 if (c == 0 || c > ' ')
231 return (0);
232 break;
233
234 case CLASS_:
235 case NCLASS_:
236 c = tolower(*l++);
237 n = *p++ & 0377;
238 do {
239 if (*p == RANGE_) {
240 p += 3;
241 n -= 2;
242 if (c >= p[-2] && c <= p[-1])
243 break;
244 } else if (c == *p++)
245 break;
246 } while (--n > 1);
247 if ((op == CLASS_) == (n <= 1))
248 return (0);
249 if (op == CLASS_)
250 p += n - 2;
251 break;
252
253 case MINUS_:
254 e = partialMatch(l, p); // Find a match
255 while (*p++ != ENDPAT_)
256 ; // Skip the pattern
257 if (e) // if match found
258 l = e; // update the string
259 break;
260
261 case PLUS_: // One or more ...
262 if ((l = partialMatch(l, p)) == 0)
263 return (0); // Gotta have a match
264 break;
265 case STAR_: // Zero or more ...
266 are = l; // Remember the line start
267 while (*l && (e = partialMatch(l, p)))
268 l = e; // get the longest match
269 while (*p++ != ENDPAT_)
270 ; // Skip the pattern
271 while (l >= are) { // Try to match rest
272 if ((e = partialMatch(l, p)))
273 return (e);
274 --l; // No try the earlier one
275 }
276 return (0); // Nothing else worked
277 break;
278
279 default:
280 printf("Wrong op code %d\n", op);
281 displayError("Can't happen -- match\n");
282 }
283 }
284 return (l);
285}
286
287// Compile a class (within [])
288static char *compileClass(char *source, char *src)
289// Start the pattern -- for error msg.
290// Start of class
291{
292 register char *s; // Source pointer
293 register char *cp; // Pattern start
294 register int c; // Current character
295 int o; // Temp
296
297 s = src;
298 o = CLASS_;
299 if (*s == '^') {
300 ++s;
301 o = NCLASS_;
302 }
303 storeEntry(o);
304 cp = pp;
305 storeEntry(0); // for byte count
306 while ((c = *s++) && c != ']') {
307 if (c == '\\') { // Store quoted char
308 if ((c = *s++) == '\0')
309 badPattern(
310 "Class is terminated badly", source, s);
311 else
312 storeEntry(tolower(c));
313 } else if (c == '-' && (pp - cp) > 1 && *s != ']'
314 && *s != '\0') {
315 c = pp[-1]; // Start of range
316 pp[-1] = RANGE_; // Signal's range
317 storeEntry(c); // Restore start
318 c = *s++; // Get end char
319 storeEntry(tolower(c)); // and store end char
320 } else {
321 storeEntry(tolower(c)); // Store normal char
322 }
323 }
324 if (c != ']')
325 badPattern("Class is unterminated", source, s);
326 if ((c = (pp - cp)) >= 256)
327 badPattern("Class very large", source, s);
328 if (c == 0)
329 badPattern("Class empty", source, s);
330 *cp = c;
331 return (s);
332}
333
334// Match the line -lbuf with the pattern -pbuf and if matched, return 1
335static int matchBuf()
336{
337 register char *l; // Line pointer
338
339 for (l = lbuf; *l; ++l) {
340 if (partialMatch(l, pbuf))
341 return 1;
342
343 // printf("%s",l);
344 }
345
346 return 0;
347}
348
349// Scan the file to match the pattern in pbuf[]
350static int grep(FILE *fp, char *fn)
351// File we want to process and file name (for -f option)
352{
353 register int lno, count, m;
354 lno = 0;
355 count = 0;
356 while (fgets(lbuf, LMAX, fp)) {
357 ++lno;
358 m = matchBuf();
359 if ((m && !vflag) || (!m && vflag)) {
360 ++count;
361 if (!cflag) {
362 if (fflag && fn) {
363 displayFile(fn);
364 fn = 0;
365 }
366 if (nflag) {
367 printf("%d\t", lno);
368 }
369 printf("%s\n", lbuf);
370 }
371 }
372 }
373 if (cflag) {
374 if (fflag && fn)
375 displayFile(fn);
376 printf("%d\n", count);
377 }
378
379 return CMD_SUCCESS;
380}
381
382// Compile the matched pattern into global pbuf[]
383static int compilePattern(char *source)
384// The pattern we want to compile
385{
386 register char *s; // Source string- pointer
387 register char *lp; // Last pattern- pointer
388 register int c; // Current character
389 int o; // Temp
390 char *spp; // Pointer to save beginning of pattern
391
392 s = source;
393 // debug=1;
394 if (debug)
395 printf("Pattern = %s \n", s);
396
397 pp = pbuf;
398
399 while ((c = *s++)) {
400
401 // STAR, PLUS and MINUS are special.
402 if (c == '*' || c == '+' || c == '-') {
403 if (pp == pbuf || (o = pp[-1]) == BOL_ || o == EOL_
404 || o == STAR_ || o == PLUS_ || o == MINUS_)
405 badPattern("Illegal occurrance op.", source, s);
406 storeEntry(ENDPAT_);
407 storeEntry(ENDPAT_);
408 spp = pp; // Save end of pattern
409 while (--pp > lp) // Move the pattern down
410 *pp = pp[-1]; // one byte
411 *pp = (c == '*') ? STAR_ : (c == '-') ? MINUS_ : PLUS_;
412 pp = spp; // Restore end of pattern
413 continue;
414 }
415
416 // All the rest.
417
418 lp = pp; // Remember the start
419 switch (c) {
420
421 case '^':
422 storeEntry(BOL_);
423 break;
424
425 case '$':
426 storeEntry(EOL_);
427 break;
428
429 case '.':
430 storeEntry(ANY_);
431 break;
432
433 case '[':
434 s = compileClass(source, s);
435 break;
436
437 case ':':
438 if (*s) {
439 switch (tolower(c = *s++)) {
440
441 case 'a':
442 case 'A':
443 storeEntry(ALPHA_);
444 break;
445
446 case 'd':
447 case 'D':
448 storeEntry(DIGIT_);
449 break;
450
451 case 'n':
452 case 'N':
453 storeEntry(NALPHA_);
454 break;
455
456 case ' ':
457 storeEntry(PUNCT_);
458 break;
459
460 default:
461 badPattern("Unknown : type", source, s);
462 }
463 break;
464 } else
465 badPattern("No : type", source, s);
466
467 break;
468
469 case '\\':
470 if (*s)
471 c = *s++;
472
473 break;
474
475 default:
476 storeEntry(CHAR_);
477 storeEntry(tolower(c));
478 }
479 }
480 storeEntry(ENDPAT_);
481 storeEntry(0); // Terminate the string
482 if (debug) {
483 for (lp = pbuf; lp < pp;) {
484 if ((c = (*lp++ & 0377)) < ' ')
485 printf("\\%o ", c);
486 else
487 printf("%c ", c);
488 }
489 printf("\n");
490 }
491
492 return CMD_SUCCESS;
493}
494
495static const char *cmdname = "grep";
496
497// Dispays help for the grep cmd in various levels
498void help_cmd_grep(unsigned int level)
499{
500 printf("This is the %s help for '%s'.\n", level ? EXT_HELP : SHORT_HELP,
501 cmdname);
502 return;
503}
504
505// Main entry point for grep cmd, accepts an array of arguments
506int cmd_grep(char **argv)
507{
508 int argc;
509
510 // Count the arguments
511 for (argc = 0; argv[argc] != NULL; argc++)
512 ;
513
514 register char *p;
515 register int c, i;
516 int gotpattern;
517
518 FILE *f;
519
520 if (argc <= 1)
521 usageSummary("No arguments");
522 if (argc == 2 && argv[1][0] == '?' && argv[1][1] == 0) {
523 help(documentation);
524 help(patdoc);
525 return CMD_SUCCESS;
526 }
527 nfile = argc - 1;
528 gotpattern = 0;
529 for (i = 1; i < argc; ++i) {
530 p = argv[i];
531 if (*p == '-') {
532 ++p;
533 while ((c = *p++)) {
534 switch (tolower(c)) {
535
536 case '?':
537 help(documentation);
538 break;
539
540 case 'C':
541 case 'c':
542 ++cflag;
543 break;
544
545 case 'D':
546 case 'd':
547 ++debug;
548 break;
549
550 case 'F':
551 case 'f':
552 ++fflag;
553 break;
554
555 case 'n':
556 case 'N':
557 ++nflag;
558 break;
559
560 case 'v':
561 case 'V':
562 ++vflag;
563 break;
564
565 default:
566 usageSummary("Unknown flag");
567 }
568 }
569 argv[i] = 0;
570 --nfile;
571 } else if (!gotpattern) {
572 compilePattern(p);
573 argv[i] = 0;
574 ++gotpattern;
575 --nfile;
576 }
577 }
578
579 if (!gotpattern)
580 usageSummary("No pattern");
581 if (nfile == 0)
582 grep(stdin, 0);
583 else {
584
585 fflag = (fflag) ^ (nfile > 0);
586 for (i = 1; i < argc; ++i) {
587 if ((p = argv[i])) {
588 if ((f = fopen(p, "r")) == NULL)
589 cantOpen(p);
590 else {
591 grep(f, p);
592 fclose(f);
593 }
594 }
595 }
596 }
597
598 cflag = 0;
599 fflag = 0;
600 nflag = 0;
601 vflag = 0;
602 nfile = 0;
603 debug = 0;
604 return CMD_SUCCESS;
605}
Note: See TracBrowser for help on using the repository browser.