00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 #ifdef HAVE_CONFIG_H
00041 #include "config.h"
00042 #endif
00043
00044 #include <ctype.h>
00045 #include <locale.h>
00046 #include <stdio.h>
00047 #include <string.h>
00048 #include <stdlib.h>
00049 #include <errno.h>
00050
00051 #include <sys/types.h>
00052 #include <sys/stat.h>
00053
00054 #ifdef HAVE_UNISTD_H
00055 #include <unistd.h>
00056 #endif
00057
00058 #ifdef SUPPORT_LIBZ
00059 #include <zlib.h>
00060 #endif
00061
00062 #ifdef SUPPORT_LIBBZ2
00063 #include <bzlib.h>
00064 #endif
00065
00066 #include "pcre.h"
00067
00068 #define FALSE 0
00069 #define TRUE 1
00070
00071 typedef int BOOL;
00072
00073 #define MAX_PATTERN_COUNT 100
00074 #define OFFSET_SIZE 99
00075
00076 #if BUFSIZ > 8192
00077 #define MBUFTHIRD BUFSIZ
00078 #else
00079 #define MBUFTHIRD 8192
00080 #endif
00081
00082
00083
00084
00085
00086 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
00087
00088
00089
00090 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
00091
00092
00093
00094 enum { dee_READ, dee_SKIP, dee_RECURSE };
00095 enum { DEE_READ, DEE_SKIP };
00096
00097
00098
00099 #define PO_WORD_MATCH 0x0001
00100 #define PO_LINE_MATCH 0x0002
00101 #define PO_FIXED_STRINGS 0x0004
00102
00103
00104
00105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116 #ifdef JFRIEDL_DEBUG
00117 static int S_arg = -1;
00118 static unsigned int jfriedl_XR = 0;
00119 static unsigned int jfriedl_XT = 0;
00120 static const char *jfriedl_prefix = "";
00121 static const char *jfriedl_postfix = "";
00122 #endif
00123
00124 static int endlinetype;
00125
00126 static char *colour_string = (char *)"1;31";
00127 static char *colour_option = NULL;
00128 static char *dee_option = NULL;
00129 static char *DEE_option = NULL;
00130 static char *newline = NULL;
00131 static char *pattern_filename = NULL;
00132 static char *stdin_name = (char *)"(standard input)";
00133 static char *locale = NULL;
00134
00135 static const unsigned char *pcretables = NULL;
00136
00137 static int pattern_count = 0;
00138 static pcre **pattern_list = NULL;
00139 static pcre_extra **hints_list = NULL;
00140
00141 static char *include_pattern = NULL;
00142 static char *exclude_pattern = NULL;
00143 static char *include_dir_pattern = NULL;
00144 static char *exclude_dir_pattern = NULL;
00145
00146 static pcre *include_compiled = NULL;
00147 static pcre *exclude_compiled = NULL;
00148 static pcre *include_dir_compiled = NULL;
00149 static pcre *exclude_dir_compiled = NULL;
00150
00151 static int after_context = 0;
00152 static int before_context = 0;
00153 static int both_context = 0;
00154 static int dee_action = dee_READ;
00155 static int DEE_action = DEE_READ;
00156 static int error_count = 0;
00157 static int filenames = FN_DEFAULT;
00158 static int process_options = 0;
00159
00160 static BOOL count_only = FALSE;
00161 static BOOL do_colour = FALSE;
00162 static BOOL file_offsets = FALSE;
00163 static BOOL hyphenpending = FALSE;
00164 static BOOL invert = FALSE;
00165 static BOOL line_offsets = FALSE;
00166 static BOOL multiline = FALSE;
00167 static BOOL number = FALSE;
00168 static BOOL only_matching = FALSE;
00169 static BOOL quiet = FALSE;
00170 static BOOL silent = FALSE;
00171 static BOOL utf8 = FALSE;
00172
00173
00174
00175 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
00176 OP_PATLIST };
00177
00178 typedef struct option_item {
00179 int type;
00180 int one_char;
00181 void *dataptr;
00182 const char *long_name;
00183 const char *help_text;
00184 } option_item;
00185
00186
00187
00188
00189 #define N_COLOUR (-1)
00190 #define N_EXCLUDE (-2)
00191 #define N_EXCLUDE_DIR (-3)
00192 #define N_HELP (-4)
00193 #define N_INCLUDE (-5)
00194 #define N_INCLUDE_DIR (-6)
00195 #define N_LABEL (-7)
00196 #define N_LOCALE (-8)
00197 #define N_NULL (-9)
00198 #define N_LOFFSETS (-10)
00199 #define N_FOFFSETS (-11)
00200
00201 static option_item optionlist[] = {
00202 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
00203 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
00204 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
00205 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
00206 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
00207 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
00208 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
00209 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
00210 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
00211 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
00212 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
00213 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
00214 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
00215 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
00216 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
00217 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
00218 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
00219 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
00220 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
00221 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
00222 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
00223 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
00224 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
00225 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
00226 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
00227 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
00228 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
00229 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
00230 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
00231 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
00232 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
00233 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
00234 #ifdef JFRIEDL_DEBUG
00235 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
00236 #endif
00237 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
00238 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
00239 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
00240 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
00241 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
00242 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
00243 { OP_NODATA, 0, NULL, NULL, NULL }
00244 };
00245
00246
00247
00248
00249
00250
00251 static const char *prefix[] = {
00252 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
00253
00254 static const char *suffix[] = {
00255 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
00256
00257
00258
00259 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
00260
00261 const char utf8_table4[] = {
00262 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00263 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00264 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
00265 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278
00279 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
00280 #include <sys/types.h>
00281 #include <sys/stat.h>
00282 #include <dirent.h>
00283
00284 typedef DIR directory_type;
00285
00286 static int
00287 isdirectory(char *filename)
00288 {
00289 struct stat statbuf;
00290 if (stat(filename, &statbuf) < 0)
00291 return 0;
00292 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
00293 }
00294
00295 static directory_type *
00296 opendirectory(char *filename)
00297 {
00298 return opendir(filename);
00299 }
00300
00301 static char *
00302 readdirectory(directory_type *dir)
00303 {
00304 for (;;)
00305 {
00306 struct dirent *dent = readdir(dir);
00307 if (dent == NULL) return NULL;
00308 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
00309 return dent->d_name;
00310 }
00311
00312 }
00313
00314 static void
00315 closedirectory(directory_type *dir)
00316 {
00317 closedir(dir);
00318 }
00319
00320
00321
00322
00323 static int
00324 isregfile(char *filename)
00325 {
00326 struct stat statbuf;
00327 if (stat(filename, &statbuf) < 0)
00328 return 1;
00329 return (statbuf.st_mode & S_IFMT) == S_IFREG;
00330 }
00331
00332
00333
00334
00335 static BOOL
00336 is_stdout_tty(void)
00337 {
00338 return isatty(fileno(stdout));
00339 }
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350 #elif HAVE_WINDOWS_H
00351
00352 #ifndef STRICT
00353 # define STRICT
00354 #endif
00355 #ifndef WIN32_LEAN_AND_MEAN
00356 # define WIN32_LEAN_AND_MEAN
00357 #endif
00358
00359 #include <windows.h>
00360
00361 #ifndef INVALID_FILE_ATTRIBUTES
00362 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
00363 #endif
00364
00365 typedef struct directory_type
00366 {
00367 HANDLE handle;
00368 BOOL first;
00369 WIN32_FIND_DATA data;
00370 } directory_type;
00371
00372 int
00373 isdirectory(char *filename)
00374 {
00375 DWORD attr = GetFileAttributes(filename);
00376 if (attr == INVALID_FILE_ATTRIBUTES)
00377 return 0;
00378 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
00379 }
00380
00381 directory_type *
00382 opendirectory(char *filename)
00383 {
00384 size_t len;
00385 char *pattern;
00386 directory_type *dir;
00387 DWORD err;
00388 len = strlen(filename);
00389 pattern = (char *) malloc(len + 3);
00390 dir = (directory_type *) malloc(sizeof(*dir));
00391 if ((pattern == NULL) || (dir == NULL))
00392 {
00393 fprintf(stderr, "pcregrep: malloc failed\n");
00394 exit(2);
00395 }
00396 memcpy(pattern, filename, len);
00397 memcpy(&(pattern[len]), "\\*", 3);
00398 dir->handle = FindFirstFile(pattern, &(dir->data));
00399 if (dir->handle != INVALID_HANDLE_VALUE)
00400 {
00401 free(pattern);
00402 dir->first = TRUE;
00403 return dir;
00404 }
00405 err = GetLastError();
00406 free(pattern);
00407 free(dir);
00408 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
00409 return NULL;
00410 }
00411
00412 char *
00413 readdirectory(directory_type *dir)
00414 {
00415 for (;;)
00416 {
00417 if (!dir->first)
00418 {
00419 if (!FindNextFile(dir->handle, &(dir->data)))
00420 return NULL;
00421 }
00422 else
00423 {
00424 dir->first = FALSE;
00425 }
00426 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
00427 return dir->data.cFileName;
00428 }
00429 #ifndef _MSC_VER
00430 return NULL;
00431 #endif
00432 }
00433
00434 void
00435 closedirectory(directory_type *dir)
00436 {
00437 FindClose(dir->handle);
00438 free(dir);
00439 }
00440
00441
00442
00443
00444
00445
00446
00447 int isregfile(char *filename)
00448 {
00449 return !isdirectory(filename);
00450 }
00451
00452
00453
00454
00455
00456
00457 static BOOL
00458 is_stdout_tty(void)
00459 {
00460 return FALSE;
00461 }
00462
00463
00464
00465
00466
00467
00468 #else
00469
00470 typedef void directory_type;
00471
00472 int isdirectory(char *filename) { return 0; }
00473 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
00474 char *readdirectory(directory_type *dir) { return (char*)0;}
00475 void closedirectory(directory_type *dir) {}
00476
00477
00478
00479
00480
00481
00482 int isregfile(char *filename) { return 1; }
00483
00484
00485
00486
00487 static BOOL
00488 is_stdout_tty(void)
00489 {
00490 return FALSE;
00491 }
00492
00493
00494 #endif
00495
00496
00497
00498 #ifndef HAVE_STRERROR
00499
00500
00501
00502
00503
00504
00505
00506
00507 extern int sys_nerr;
00508 extern char *sys_errlist[];
00509
00510 char *
00511 strerror(int n)
00512 {
00513 if (n < 0 || n >= sys_nerr) return "unknown error number";
00514 return sys_errlist[n];
00515 }
00516 #endif
00517
00518
00519
00520
00521
00522
00523
00524
00525
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535 static char *
00536 end_of_line(char *p, char *endptr, int *lenptr)
00537 {
00538 switch(endlinetype)
00539 {
00540 default:
00541 case EL_LF:
00542 while (p < endptr && *p != '\n') p++;
00543 if (p < endptr)
00544 {
00545 *lenptr = 1;
00546 return p + 1;
00547 }
00548 *lenptr = 0;
00549 return endptr;
00550
00551 case EL_CR:
00552 while (p < endptr && *p != '\r') p++;
00553 if (p < endptr)
00554 {
00555 *lenptr = 1;
00556 return p + 1;
00557 }
00558 *lenptr = 0;
00559 return endptr;
00560
00561 case EL_CRLF:
00562 for (;;)
00563 {
00564 while (p < endptr && *p != '\r') p++;
00565 if (++p >= endptr)
00566 {
00567 *lenptr = 0;
00568 return endptr;
00569 }
00570 if (*p == '\n')
00571 {
00572 *lenptr = 2;
00573 return p + 1;
00574 }
00575 }
00576 break;
00577
00578 case EL_ANYCRLF:
00579 while (p < endptr)
00580 {
00581 int extra = 0;
00582 register int c = *((unsigned char *)p);
00583
00584 if (utf8 && c >= 0xc0)
00585 {
00586 int gcii, gcss;
00587 extra = utf8_table4[c & 0x3f];
00588 gcss = 6*extra;
00589 c = (c & utf8_table3[extra]) << gcss;
00590 for (gcii = 1; gcii <= extra; gcii++)
00591 {
00592 gcss -= 6;
00593 c |= (p[gcii] & 0x3f) << gcss;
00594 }
00595 }
00596
00597 p += 1 + extra;
00598
00599 switch (c)
00600 {
00601 case 0x0a:
00602 *lenptr = 1;
00603 return p;
00604
00605 case 0x0d:
00606 if (p < endptr && *p == 0x0a)
00607 {
00608 *lenptr = 2;
00609 p++;
00610 }
00611 else *lenptr = 1;
00612 return p;
00613
00614 default:
00615 break;
00616 }
00617 }
00618
00619 *lenptr = 0;
00620 return endptr;
00621
00622 case EL_ANY:
00623 while (p < endptr)
00624 {
00625 int extra = 0;
00626 register int c = *((unsigned char *)p);
00627
00628 if (utf8 && c >= 0xc0)
00629 {
00630 int gcii, gcss;
00631 extra = utf8_table4[c & 0x3f];
00632 gcss = 6*extra;
00633 c = (c & utf8_table3[extra]) << gcss;
00634 for (gcii = 1; gcii <= extra; gcii++)
00635 {
00636 gcss -= 6;
00637 c |= (p[gcii] & 0x3f) << gcss;
00638 }
00639 }
00640
00641 p += 1 + extra;
00642
00643 switch (c)
00644 {
00645 case 0x0a:
00646 case 0x0b:
00647 case 0x0c:
00648 *lenptr = 1;
00649 return p;
00650
00651 case 0x0d:
00652 if (p < endptr && *p == 0x0a)
00653 {
00654 *lenptr = 2;
00655 p++;
00656 }
00657 else *lenptr = 1;
00658 return p;
00659
00660 case 0x85:
00661 *lenptr = utf8? 2 : 1;
00662 return p;
00663
00664 case 0x2028:
00665 case 0x2029:
00666 *lenptr = 3;
00667 return p;
00668
00669 default:
00670 break;
00671 }
00672 }
00673
00674 *lenptr = 0;
00675 return endptr;
00676 }
00677 }
00678
00679
00680
00681
00682
00683
00684
00685
00686
00687
00688
00689
00690
00691
00692
00693
00694 static char *
00695 previous_line(char *p, char *startptr)
00696 {
00697 switch(endlinetype)
00698 {
00699 default:
00700 case EL_LF:
00701 p--;
00702 while (p > startptr && p[-1] != '\n') p--;
00703 return p;
00704
00705 case EL_CR:
00706 p--;
00707 while (p > startptr && p[-1] != '\n') p--;
00708 return p;
00709
00710 case EL_CRLF:
00711 for (;;)
00712 {
00713 p -= 2;
00714 while (p > startptr && p[-1] != '\n') p--;
00715 if (p <= startptr + 1 || p[-2] == '\r') return p;
00716 }
00717 return p;
00718
00719 case EL_ANY:
00720 case EL_ANYCRLF:
00721 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
00722 if (utf8) while ((*p & 0xc0) == 0x80) p--;
00723
00724 while (p > startptr)
00725 {
00726 register int c;
00727 char *pp = p - 1;
00728
00729 if (utf8)
00730 {
00731 int extra = 0;
00732 while ((*pp & 0xc0) == 0x80) pp--;
00733 c = *((unsigned char *)pp);
00734 if (c >= 0xc0)
00735 {
00736 int gcii, gcss;
00737 extra = utf8_table4[c & 0x3f];
00738 gcss = 6*extra;
00739 c = (c & utf8_table3[extra]) << gcss;
00740 for (gcii = 1; gcii <= extra; gcii++)
00741 {
00742 gcss -= 6;
00743 c |= (pp[gcii] & 0x3f) << gcss;
00744 }
00745 }
00746 }
00747 else c = *((unsigned char *)pp);
00748
00749 if (endlinetype == EL_ANYCRLF) switch (c)
00750 {
00751 case 0x0a:
00752 case 0x0d:
00753 return p;
00754
00755 default:
00756 break;
00757 }
00758
00759 else switch (c)
00760 {
00761 case 0x0a:
00762 case 0x0b:
00763 case 0x0c:
00764 case 0x0d:
00765 case 0x85:
00766 case 0x2028:
00767 case 0x2029:
00768 return p;
00769
00770 default:
00771 break;
00772 }
00773
00774 p = pp;
00775 }
00776
00777 return startptr;
00778 }
00779 }
00780
00781
00782
00783
00784
00785
00786
00787
00788
00789
00790
00791
00792
00793
00794
00795
00796
00797
00798
00799
00800
00801
00802 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
00803 char *endptr, char *printname)
00804 {
00805 if (after_context > 0 && lastmatchnumber > 0)
00806 {
00807 int count = 0;
00808 while (lastmatchrestart < endptr && count++ < after_context)
00809 {
00810 int ellength;
00811 char *pp = lastmatchrestart;
00812 if (printname != NULL) fprintf(stdout, "%s-", printname);
00813 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
00814 pp = end_of_line(pp, endptr, &ellength);
00815 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
00816 lastmatchrestart = pp;
00817 }
00818 hyphenpending = TRUE;
00819 }
00820 }
00821
00822
00823
00824
00825
00826
00827
00828
00829
00830
00831
00832
00833
00834
00835
00836
00837
00838
00839
00840
00841
00842
00843 static BOOL
00844 match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
00845 {
00846 int i;
00847 for (i = 0; i < pattern_count; i++)
00848 {
00849 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,
00850 PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
00851 if (*mrc >= 0) return TRUE;
00852 if (*mrc == PCRE_ERROR_NOMATCH) continue;
00853 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
00854 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
00855 fprintf(stderr, "this text:\n");
00856 fwrite(matchptr, 1, length, stderr);
00857 fprintf(stderr, "\n");
00858 if (error_count == 0 &&
00859 (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
00860 {
00861 fprintf(stderr, "pcregrep: error %d means that a resource limit "
00862 "was exceeded\n", *mrc);
00863 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
00864 }
00865 if (error_count++ > 20)
00866 {
00867 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
00868 exit(2);
00869 }
00870 return invert;
00871 }
00872
00873 return FALSE;
00874 }
00875
00876
00877
00878
00879
00880
00881
00882
00883
00884
00885
00886
00887
00888
00889
00890
00891
00892
00893
00894
00895
00896
00897
00898
00899
00900
00901
00902
00903 static int
00904 pcregrep(void *handle, int frtype, char *printname)
00905 {
00906 int rc = 1;
00907 int linenumber = 1;
00908 int lastmatchnumber = 0;
00909 int count = 0;
00910 int filepos = 0;
00911 int offsets[OFFSET_SIZE];
00912 char *lastmatchrestart = NULL;
00913 char buffer[3*MBUFTHIRD];
00914 char *ptr = buffer;
00915 char *endptr;
00916 size_t bufflength;
00917 BOOL endhyphenpending = FALSE;
00918 FILE *in = NULL;
00919
00920 #ifdef SUPPORT_LIBZ
00921 gzFile ingz = NULL;
00922 #endif
00923
00924 #ifdef SUPPORT_LIBBZ2
00925 BZFILE *inbz2 = NULL;
00926 #endif
00927
00928
00929
00930
00931
00932
00933
00934 #ifdef SUPPORT_LIBZ
00935 if (frtype == FR_LIBZ)
00936 {
00937 ingz = (gzFile)handle;
00938 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
00939 }
00940 else
00941 #endif
00942
00943 #ifdef SUPPORT_LIBBZ2
00944 if (frtype == FR_LIBBZ2)
00945 {
00946 inbz2 = (BZFILE *)handle;
00947 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
00948 if ((int)bufflength < 0) return 2;
00949 }
00950 else
00951 #endif
00952
00953 {
00954 in = (FILE *)handle;
00955 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
00956 }
00957
00958 endptr = buffer + bufflength;
00959
00960
00961
00962
00963
00964
00965 while (ptr < endptr)
00966 {
00967 int endlinelength;
00968 int mrc = 0;
00969 BOOL match;
00970 char *matchptr = ptr;
00971 char *t = ptr;
00972 size_t length, linelength;
00973
00974
00975
00976
00977
00978
00979
00980
00981
00982 t = end_of_line(t, endptr, &endlinelength);
00983 linelength = t - ptr - endlinelength;
00984 length = multiline? (size_t)(endptr - ptr) : linelength;
00985
00986
00987
00988 #ifdef JFRIEDL_DEBUG
00989 if (jfriedl_XT || jfriedl_XR)
00990 {
00991 #include <sys/time.h>
00992 #include <time.h>
00993 struct timeval start_time, end_time;
00994 struct timezone dummy;
00995 int i;
00996
00997 if (jfriedl_XT)
00998 {
00999 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
01000 const char *orig = ptr;
01001 ptr = malloc(newlen + 1);
01002 if (!ptr) {
01003 printf("out of memory");
01004 exit(2);
01005 }
01006 endptr = ptr;
01007 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
01008 for (i = 0; i < jfriedl_XT; i++) {
01009 strncpy(endptr, orig, length);
01010 endptr += length;
01011 }
01012 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
01013 length = newlen;
01014 }
01015
01016 if (gettimeofday(&start_time, &dummy) != 0)
01017 perror("bad gettimeofday");
01018
01019
01020 for (i = 0; i < jfriedl_XR; i++)
01021 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
01022 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
01023
01024 if (gettimeofday(&end_time, &dummy) != 0)
01025 perror("bad gettimeofday");
01026
01027 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
01028 -
01029 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
01030
01031 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
01032 return 0;
01033 }
01034 #endif
01035
01036
01037
01038
01039 ONLY_MATCHING_RESTART:
01040
01041
01042
01043
01044
01045 match = match_patterns(matchptr, length, offsets, &mrc);
01046
01047
01048
01049 if (match != invert)
01050 {
01051 BOOL hyphenprinted = FALSE;
01052
01053
01054
01055 if (filenames == FN_NOMATCH_ONLY) return 1;
01056
01057
01058
01059 if (count_only) count++;
01060
01061
01062
01063
01064 else if (filenames == FN_ONLY)
01065 {
01066 fprintf(stdout, "%s\n", printname);
01067 return 0;
01068 }
01069
01070
01071
01072 else if (quiet) return 0;
01073
01074
01075
01076
01077
01078
01079
01080
01081
01082 else if (only_matching)
01083 {
01084 if (!invert)
01085 {
01086 if (printname != NULL) fprintf(stdout, "%s:", printname);
01087 if (number) fprintf(stdout, "%d:", linenumber);
01088 if (line_offsets)
01089 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
01090 offsets[1] - offsets[0]);
01091 else if (file_offsets)
01092 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
01093 offsets[1] - offsets[0]);
01094 else
01095 {
01096 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
01097 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
01098 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
01099 }
01100 fprintf(stdout, "\n");
01101 matchptr += offsets[1];
01102 length -= offsets[1];
01103 match = FALSE;
01104 goto ONLY_MATCHING_RESTART;
01105 }
01106 }
01107
01108
01109
01110
01111
01112 else
01113 {
01114
01115
01116
01117 if (after_context > 0 && lastmatchnumber > 0)
01118 {
01119 int ellength;
01120 int linecount = 0;
01121 char *p = lastmatchrestart;
01122
01123 while (p < ptr && linecount < after_context)
01124 {
01125 p = end_of_line(p, ptr, &ellength);
01126 linecount++;
01127 }
01128
01129
01130
01131
01132
01133 while (lastmatchrestart < p)
01134 {
01135 char *pp = lastmatchrestart;
01136 if (printname != NULL) fprintf(stdout, "%s-", printname);
01137 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
01138 pp = end_of_line(pp, endptr, &ellength);
01139 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
01140 lastmatchrestart = pp;
01141 }
01142 if (lastmatchrestart != ptr) hyphenpending = TRUE;
01143 }
01144
01145
01146
01147 if (hyphenpending)
01148 {
01149 fprintf(stdout, "--\n");
01150 hyphenpending = FALSE;
01151 hyphenprinted = TRUE;
01152 }
01153
01154
01155
01156
01157 if (before_context > 0)
01158 {
01159 int linecount = 0;
01160 char *p = ptr;
01161
01162 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
01163 linecount < before_context)
01164 {
01165 linecount++;
01166 p = previous_line(p, buffer);
01167 }
01168
01169 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
01170 fprintf(stdout, "--\n");
01171
01172 while (p < ptr)
01173 {
01174 int ellength;
01175 char *pp = p;
01176 if (printname != NULL) fprintf(stdout, "%s-", printname);
01177 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
01178 pp = end_of_line(pp, endptr, &ellength);
01179 fwrite(p, 1, pp - p, stdout);
01180 p = pp;
01181 }
01182 }
01183
01184
01185
01186
01187 if (after_context > 0 || before_context > 0)
01188 endhyphenpending = TRUE;
01189
01190 if (printname != NULL) fprintf(stdout, "%s:", printname);
01191 if (number) fprintf(stdout, "%d:", linenumber);
01192
01193
01194
01195
01196
01197
01198
01199 if (multiline)
01200 {
01201 int ellength;
01202 char *endmatch = ptr;
01203 if (!invert)
01204 {
01205 endmatch += offsets[1];
01206 t = ptr;
01207 while (t < endmatch)
01208 {
01209 t = end_of_line(t, endptr, &ellength);
01210 if (t <= endmatch) linenumber++; else break;
01211 }
01212 }
01213 endmatch = end_of_line(endmatch, endptr, &ellength);
01214 linelength = endmatch - ptr - ellength;
01215 }
01216
01217
01218
01219
01220
01221
01222
01223
01224 #ifdef JFRIEDL_DEBUG
01225 if (S_arg >= 0 && S_arg < mrc)
01226 {
01227 int first = S_arg * 2;
01228 int last = first + 1;
01229 fwrite(ptr, 1, offsets[first], stdout);
01230 fprintf(stdout, "X");
01231 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
01232 }
01233 else
01234 #endif
01235
01236
01237
01238
01239 if (do_colour)
01240 {
01241 int last_offset = 0;
01242 fwrite(ptr, 1, offsets[0], stdout);
01243 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
01244 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
01245 fprintf(stdout, "%c[00m", 0x1b);
01246 for (;;)
01247 {
01248 last_offset += offsets[1];
01249 matchptr += offsets[1];
01250 length -= offsets[1];
01251 if (!match_patterns(matchptr, length, offsets, &mrc)) break;
01252 fwrite(matchptr, 1, offsets[0], stdout);
01253 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
01254 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
01255 fprintf(stdout, "%c[00m", 0x1b);
01256 }
01257 fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,
01258 stdout);
01259 }
01260
01261
01262
01263 else fwrite(ptr, 1, linelength + endlinelength, stdout);
01264 }
01265
01266
01267
01268 rc = 0;
01269
01270
01271
01272
01273 lastmatchrestart = ptr + linelength + endlinelength;
01274 lastmatchnumber = linenumber + 1;
01275 }
01276
01277
01278
01279
01280
01281 if (multiline && invert && match)
01282 {
01283 int ellength;
01284 char *endmatch = ptr + offsets[1];
01285 t = ptr;
01286 while (t < endmatch)
01287 {
01288 t = end_of_line(t, endptr, &ellength);
01289 if (t <= endmatch) linenumber++; else break;
01290 }
01291 endmatch = end_of_line(endmatch, endptr, &ellength);
01292 linelength = endmatch - ptr - ellength;
01293 }
01294
01295
01296
01297
01298 ptr += linelength + endlinelength;
01299 filepos += linelength + endlinelength;
01300 linenumber++;
01301
01302
01303
01304
01305
01306
01307 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
01308 {
01309 if (after_context > 0 &&
01310 lastmatchnumber > 0 &&
01311 lastmatchrestart < buffer + MBUFTHIRD)
01312 {
01313 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
01314 lastmatchnumber = 0;
01315 }
01316
01317
01318
01319 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
01320 ptr -= MBUFTHIRD;
01321
01322 #ifdef SUPPORT_LIBZ
01323 if (frtype == FR_LIBZ)
01324 bufflength = 2*MBUFTHIRD +
01325 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
01326 else
01327 #endif
01328
01329 #ifdef SUPPORT_LIBBZ2
01330 if (frtype == FR_LIBBZ2)
01331 bufflength = 2*MBUFTHIRD +
01332 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
01333 else
01334 #endif
01335
01336 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
01337
01338 endptr = buffer + bufflength;
01339
01340
01341
01342 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
01343 }
01344 }
01345
01346
01347
01348
01349 if (!only_matching && !count_only)
01350 {
01351 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
01352 hyphenpending |= endhyphenpending;
01353 }
01354
01355
01356
01357
01358 if (filenames == FN_NOMATCH_ONLY)
01359 {
01360 fprintf(stdout, "%s\n", printname);
01361 return 0;
01362 }
01363
01364
01365
01366 if (count_only)
01367 {
01368 if (printname != NULL) fprintf(stdout, "%s:", printname);
01369 fprintf(stdout, "%d\n", count);
01370 }
01371
01372 return rc;
01373 }
01374
01375
01376
01377
01378
01379
01380
01381
01382
01383
01384
01385
01386
01387
01388
01389
01390
01391
01392
01393
01394
01395
01396 static int
01397 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
01398 {
01399 int rc = 1;
01400 int sep;
01401 int frtype;
01402 int pathlen;
01403 void *handle;
01404 FILE *in = NULL;
01405
01406 #ifdef SUPPORT_LIBZ
01407 gzFile ingz = NULL;
01408 #endif
01409
01410 #ifdef SUPPORT_LIBBZ2
01411 BZFILE *inbz2 = NULL;
01412 #endif
01413
01414
01415
01416 if (strcmp(pathname, "-") == 0)
01417 {
01418 return pcregrep(stdin, FR_PLAIN,
01419 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
01420 stdin_name : NULL);
01421 }
01422
01423
01424
01425
01426
01427
01428 if ((sep = isdirectory(pathname)) != 0)
01429 {
01430 if (dee_action == dee_SKIP) return 1;
01431 if (dee_action == dee_RECURSE)
01432 {
01433 char buffer[1024];
01434 char *nextfile;
01435 directory_type *dir = opendirectory(pathname);
01436
01437 if (dir == NULL)
01438 {
01439 if (!silent)
01440 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
01441 strerror(errno));
01442 return 2;
01443 }
01444
01445 while ((nextfile = readdirectory(dir)) != NULL)
01446 {
01447 int frc, nflen;
01448 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
01449 nflen = strlen(nextfile);
01450
01451 if (isdirectory(buffer))
01452 {
01453 if (exclude_dir_compiled != NULL &&
01454 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
01455 continue;
01456
01457 if (include_dir_compiled != NULL &&
01458 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
01459 continue;
01460 }
01461 else
01462 {
01463 if (exclude_compiled != NULL &&
01464 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
01465 continue;
01466
01467 if (include_compiled != NULL &&
01468 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
01469 continue;
01470 }
01471
01472 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
01473 if (frc > 1) rc = frc;
01474 else if (frc == 0 && rc == 1) rc = 0;
01475 }
01476
01477 closedirectory(dir);
01478 return rc;
01479 }
01480 }
01481
01482
01483
01484
01485 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
01486
01487
01488
01489
01490
01491
01492
01493 pathlen = strlen(pathname);
01494
01495
01496
01497 #ifdef SUPPORT_LIBZ
01498 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
01499 {
01500 ingz = gzopen(pathname, "rb");
01501 if (ingz == NULL)
01502 {
01503 if (!silent)
01504 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
01505 strerror(errno));
01506 return 2;
01507 }
01508 handle = (void *)ingz;
01509 frtype = FR_LIBZ;
01510 }
01511 else
01512 #endif
01513
01514
01515
01516 #ifdef SUPPORT_LIBBZ2
01517 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
01518 {
01519 inbz2 = BZ2_bzopen(pathname, "rb");
01520 handle = (void *)inbz2;
01521 frtype = FR_LIBBZ2;
01522 }
01523 else
01524 #endif
01525
01526
01527
01528
01529 #ifdef SUPPORT_LIBBZ2
01530 PLAIN_FILE:
01531 #endif
01532 {
01533 in = fopen(pathname, "r");
01534 handle = (void *)in;
01535 frtype = FR_PLAIN;
01536 }
01537
01538
01539
01540 if (handle == NULL)
01541 {
01542 if (!silent)
01543 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
01544 strerror(errno));
01545 return 2;
01546 }
01547
01548
01549
01550 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
01551 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
01552
01553
01554
01555 #ifdef SUPPORT_LIBZ
01556 if (frtype == FR_LIBZ)
01557 gzclose(ingz);
01558 else
01559 #endif
01560
01561
01562
01563
01564
01565 #ifdef SUPPORT_LIBBZ2
01566 if (frtype == FR_LIBBZ2)
01567 {
01568 if (rc == 2)
01569 {
01570 int errnum;
01571 const char *err = BZ2_bzerror(inbz2, &errnum);
01572 if (errnum == BZ_DATA_ERROR_MAGIC)
01573 {
01574 BZ2_bzclose(inbz2);
01575 goto PLAIN_FILE;
01576 }
01577 else if (!silent)
01578 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
01579 pathname, err);
01580 }
01581 BZ2_bzclose(inbz2);
01582 }
01583 else
01584 #endif
01585
01586
01587
01588 fclose(in);
01589
01590
01591
01592 return rc;
01593 }
01594
01595
01596
01597
01598
01599
01600
01601
01602 static int
01603 usage(int rc)
01604 {
01605 option_item *op;
01606 fprintf(stderr, "Usage: pcregrep [-");
01607 for (op = optionlist; op->one_char != 0; op++)
01608 {
01609 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
01610 }
01611 fprintf(stderr, "] [long options] [pattern] [files]\n");
01612 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
01613 "options.\n");
01614 return rc;
01615 }
01616
01617
01618
01619
01620
01621
01622
01623
01624 static void
01625 help(void)
01626 {
01627 option_item *op;
01628
01629 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
01630 printf("Search for PATTERN in each FILE or standard input.\n");
01631 printf("PATTERN must be present if neither -e nor -f is used.\n");
01632 printf("\"-\" can be used as a file name to mean STDIN.\n");
01633
01634 #ifdef SUPPORT_LIBZ
01635 printf("Files whose names end in .gz are read using zlib.\n");
01636 #endif
01637
01638 #ifdef SUPPORT_LIBBZ2
01639 printf("Files whose names end in .bz2 are read using bzlib2.\n");
01640 #endif
01641
01642 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
01643 printf("Other files and the standard input are read as plain files.\n\n");
01644 #else
01645 printf("All files are read as plain files, without any interpretation.\n\n");
01646 #endif
01647
01648 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
01649 printf("Options:\n");
01650
01651 for (op = optionlist; op->one_char != 0; op++)
01652 {
01653 int n;
01654 char s[4];
01655 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
01656 n = 30 - printf(" %s --%s", s, op->long_name);
01657 if (n < 1) n = 1;
01658 printf("%.*s%s\n", n, " ", op->help_text);
01659 }
01660
01661 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
01662 printf("trailing white space is removed and blank lines are ignored.\n");
01663 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
01664
01665 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
01666 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
01667 }
01668
01669
01670
01671
01672
01673
01674
01675
01676 static int
01677 handle_option(int letter, int options)
01678 {
01679 switch(letter)
01680 {
01681 case N_FOFFSETS: file_offsets = TRUE; break;
01682 case N_HELP: help(); exit(0);
01683 case N_LOFFSETS: line_offsets = number = TRUE; break;
01684 case 'c': count_only = TRUE; break;
01685 case 'F': process_options |= PO_FIXED_STRINGS; break;
01686 case 'H': filenames = FN_FORCE; break;
01687 case 'h': filenames = FN_NONE; break;
01688 case 'i': options |= PCRE_CASELESS; break;
01689 case 'l': filenames = FN_ONLY; break;
01690 case 'L': filenames = FN_NOMATCH_ONLY; break;
01691 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
01692 case 'n': number = TRUE; break;
01693 case 'o': only_matching = TRUE; break;
01694 case 'q': quiet = TRUE; break;
01695 case 'r': dee_action = dee_RECURSE; break;
01696 case 's': silent = TRUE; break;
01697 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
01698 case 'v': invert = TRUE; break;
01699 case 'w': process_options |= PO_WORD_MATCH; break;
01700 case 'x': process_options |= PO_LINE_MATCH; break;
01701
01702 case 'V':
01703 fprintf(stderr, "pcregrep version %s\n", pcre_version());
01704 exit(0);
01705 break;
01706
01707 default:
01708 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
01709 exit(usage(2));
01710 }
01711
01712 return options;
01713 }
01714
01715
01716
01717
01718
01719
01720
01721
01722
01723
01724 static char *
01725 ordin(int n)
01726 {
01727 static char buffer[8];
01728 char *p = buffer;
01729 sprintf(p, "%d", n);
01730 while (*p != 0) p++;
01731 switch (n%10)
01732 {
01733 case 1: strcpy(p, "st"); break;
01734 case 2: strcpy(p, "nd"); break;
01735 case 3: strcpy(p, "rd"); break;
01736 default: strcpy(p, "th"); break;
01737 }
01738 return buffer;
01739 }
01740
01741
01742
01743
01744
01745
01746
01747
01748
01749
01750
01751
01752
01753
01754
01755
01756
01757
01758
01759
01760
01761 static BOOL
01762 compile_single_pattern(char *pattern, int options, char *filename, int count)
01763 {
01764 char buffer[MBUFTHIRD + 16];
01765 const char *error;
01766 int errptr;
01767
01768 if (pattern_count >= MAX_PATTERN_COUNT)
01769 {
01770 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
01771 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
01772 return FALSE;
01773 }
01774
01775 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
01776 suffix[process_options]);
01777 pattern_list[pattern_count] =
01778 pcre_compile(buffer, options, &error, &errptr, pcretables);
01779 if (pattern_list[pattern_count] != NULL)
01780 {
01781 pattern_count++;
01782 return TRUE;
01783 }
01784
01785
01786
01787 errptr -= (int)strlen(prefix[process_options]);
01788 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
01789
01790 if (filename == NULL)
01791 {
01792 if (count == 0)
01793 fprintf(stderr, "pcregrep: Error in command-line regex "
01794 "at offset %d: %s\n", errptr, error);
01795 else
01796 fprintf(stderr, "pcregrep: Error in %s command-line regex "
01797 "at offset %d: %s\n", ordin(count), errptr, error);
01798 }
01799 else
01800 {
01801 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
01802 "at offset %d: %s\n", count, filename, errptr, error);
01803 }
01804
01805 return FALSE;
01806 }
01807
01808
01809
01810
01811
01812
01813
01814
01815
01816
01817
01818
01819
01820
01821
01822
01823
01824
01825
01826
01827
01828 static BOOL
01829 compile_pattern(char *pattern, int options, char *filename, int count)
01830 {
01831 if ((process_options & PO_FIXED_STRINGS) != 0)
01832 {
01833 char *eop = pattern + strlen(pattern);
01834 char buffer[MBUFTHIRD];
01835 for(;;)
01836 {
01837 int ellength;
01838 char *p = end_of_line(pattern, eop, &ellength);
01839 if (ellength == 0)
01840 return compile_single_pattern(pattern, options, filename, count);
01841 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
01842 pattern = p;
01843 if (!compile_single_pattern(buffer, options, filename, count))
01844 return FALSE;
01845 }
01846 }
01847 else return compile_single_pattern(pattern, options, filename, count);
01848 }
01849
01850
01851
01852
01853
01854
01855
01856
01857
01858 int
01859 main(int argc, char **argv)
01860 {
01861 int i, j;
01862 int rc = 1;
01863 int pcre_options = 0;
01864 int cmd_pattern_count = 0;
01865 int hint_count = 0;
01866 int errptr;
01867 BOOL only_one_at_top;
01868 char *patterns[MAX_PATTERN_COUNT];
01869 const char *locale_from = "--locale";
01870 const char *error;
01871
01872
01873
01874
01875
01876
01877
01878 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
01879 switch(i)
01880 {
01881 default: newline = (char *)"lf"; break;
01882 case 13: newline = (char *)"cr"; break;
01883 case (13 << 8) | 10: newline = (char *)"crlf"; break;
01884 case -1: newline = (char *)"any"; break;
01885 case -2: newline = (char *)"anycrlf"; break;
01886 }
01887
01888
01889
01890 for (i = 1; i < argc; i++)
01891 {
01892 option_item *op = NULL;
01893 char *option_data = (char *)"";
01894 BOOL longop;
01895 BOOL longopwasequals = FALSE;
01896
01897 if (argv[i][0] != '-') break;
01898
01899
01900
01901
01902 if (argv[i][1] == 0)
01903 {
01904 if (pattern_filename != NULL || pattern_count > 0) break;
01905 else exit(usage(2));
01906 }
01907
01908
01909
01910 if (argv[i][1] == '-')
01911 {
01912 char *arg = argv[i] + 2;
01913 char *argequals = strchr(arg, '=');
01914
01915 if (*arg == 0)
01916 {
01917 i++;
01918 break;
01919 }
01920
01921 longop = TRUE;
01922
01923
01924
01925
01926
01927
01928
01929
01930 for (op = optionlist; op->one_char != 0; op++)
01931 {
01932 char *opbra = strchr(op->long_name, '(');
01933 char *equals = strchr(op->long_name, '=');
01934 if (opbra == NULL)
01935 {
01936 if (equals == NULL)
01937 {
01938 if (strcmp(arg, op->long_name) == 0) break;
01939 }
01940 else
01941 {
01942 int oplen = equals - op->long_name;
01943 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
01944 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
01945 {
01946 option_data = arg + arglen;
01947 if (*option_data == '=')
01948 {
01949 option_data++;
01950 longopwasequals = TRUE;
01951 }
01952 break;
01953 }
01954 }
01955 }
01956 else
01957 {
01958 char buff1[24];
01959 char buff2[24];
01960 int baselen = opbra - op->long_name;
01961 sprintf(buff1, "%.*s", baselen, op->long_name);
01962 sprintf(buff2, "%s%.*s", buff1,
01963 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
01964 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
01965 break;
01966 }
01967 }
01968
01969 if (op->one_char == 0)
01970 {
01971 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
01972 exit(usage(2));
01973 }
01974 }
01975
01976
01977
01978
01979
01980
01981
01982
01983 #ifdef JFRIEDL_DEBUG
01984 else if (strcmp(argv[i], "-pre") == 0) {
01985 jfriedl_prefix = argv[++i];
01986 continue;
01987 } else if (strcmp(argv[i], "-post") == 0) {
01988 jfriedl_postfix = argv[++i];
01989 continue;
01990 } else if (strcmp(argv[i], "-XT") == 0) {
01991 sscanf(argv[++i], "%d", &jfriedl_XT);
01992 continue;
01993 } else if (strcmp(argv[i], "-XR") == 0) {
01994 sscanf(argv[++i], "%d", &jfriedl_XR);
01995 continue;
01996 }
01997 #endif
01998
01999
02000
02001
02002
02003 else
02004 {
02005 char *s = argv[i] + 1;
02006 longop = FALSE;
02007 while (*s != 0)
02008 {
02009 for (op = optionlist; op->one_char != 0; op++)
02010 { if (*s == op->one_char) break; }
02011 if (op->one_char == 0)
02012 {
02013 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
02014 *s, argv[i]);
02015 exit(usage(2));
02016 }
02017 if (op->type != OP_NODATA || s[1] == 0)
02018 {
02019 option_data = s+1;
02020 break;
02021 }
02022 pcre_options = handle_option(*s++, pcre_options);
02023 }
02024 }
02025
02026
02027
02028
02029
02030 if (op->type == OP_NODATA)
02031 {
02032 pcre_options = handle_option(op->one_char, pcre_options);
02033 continue;
02034 }
02035
02036
02037
02038
02039
02040
02041 if (*option_data == 0 &&
02042 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
02043 {
02044 switch (op->one_char)
02045 {
02046 case N_COLOUR:
02047 colour_option = (char *)"auto";
02048 break;
02049 #ifdef JFRIEDL_DEBUG
02050 case 'S':
02051 S_arg = 0;
02052 break;
02053 #endif
02054 }
02055 continue;
02056 }
02057
02058
02059
02060 if (*option_data == 0)
02061 {
02062 if (i >= argc - 1 || longopwasequals)
02063 {
02064 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
02065 exit(usage(2));
02066 }
02067 option_data = argv[++i];
02068 }
02069
02070
02071
02072
02073 if (op->type == OP_PATLIST)
02074 {
02075 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
02076 {
02077 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
02078 MAX_PATTERN_COUNT);
02079 return 2;
02080 }
02081 patterns[cmd_pattern_count++] = option_data;
02082 }
02083
02084
02085
02086 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
02087 {
02088 *((char **)op->dataptr) = option_data;
02089 }
02090 else
02091 {
02092 char *endptr;
02093 int n = strtoul(option_data, &endptr, 10);
02094 if (*endptr != 0)
02095 {
02096 if (longop)
02097 {
02098 char *equals = strchr(op->long_name, '=');
02099 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
02100 equals - op->long_name;
02101 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
02102 option_data, nlen, op->long_name);
02103 }
02104 else
02105 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
02106 option_data, op->one_char);
02107 exit(usage(2));
02108 }
02109 *((int *)op->dataptr) = n;
02110 }
02111 }
02112
02113
02114
02115
02116 if (both_context > 0)
02117 {
02118 if (after_context == 0) after_context = both_context;
02119 if (before_context == 0) before_context = both_context;
02120 }
02121
02122
02123
02124
02125 if ((only_matching && (file_offsets || line_offsets)) ||
02126 (file_offsets && line_offsets))
02127 {
02128 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
02129 "and/or --line-offsets\n");
02130 exit(usage(2));
02131 }
02132
02133 if (file_offsets || line_offsets) only_matching = TRUE;
02134
02135
02136
02137
02138 if (locale == NULL)
02139 {
02140 locale = getenv("LC_ALL");
02141 locale_from = "LCC_ALL";
02142 }
02143
02144 if (locale == NULL)
02145 {
02146 locale = getenv("LC_CTYPE");
02147 locale_from = "LC_CTYPE";
02148 }
02149
02150
02151
02152
02153 if (locale != NULL)
02154 {
02155 if (setlocale(LC_CTYPE, locale) == NULL)
02156 {
02157 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
02158 locale, locale_from);
02159 return 2;
02160 }
02161 pcretables = pcre_maketables();
02162 }
02163
02164
02165
02166 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
02167 {
02168 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
02169 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
02170 else
02171 {
02172 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
02173 colour_option);
02174 return 2;
02175 }
02176 if (do_colour)
02177 {
02178 char *cs = getenv("PCREGREP_COLOUR");
02179 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
02180 if (cs != NULL) colour_string = cs;
02181 }
02182 }
02183
02184
02185
02186 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
02187 {
02188 pcre_options |= PCRE_NEWLINE_CR;
02189 endlinetype = EL_CR;
02190 }
02191 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
02192 {
02193 pcre_options |= PCRE_NEWLINE_LF;
02194 endlinetype = EL_LF;
02195 }
02196 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
02197 {
02198 pcre_options |= PCRE_NEWLINE_CRLF;
02199 endlinetype = EL_CRLF;
02200 }
02201 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
02202 {
02203 pcre_options |= PCRE_NEWLINE_ANY;
02204 endlinetype = EL_ANY;
02205 }
02206 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
02207 {
02208 pcre_options |= PCRE_NEWLINE_ANYCRLF;
02209 endlinetype = EL_ANYCRLF;
02210 }
02211 else
02212 {
02213 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
02214 return 2;
02215 }
02216
02217
02218
02219 if (dee_option != NULL)
02220 {
02221 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
02222 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
02223 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
02224 else
02225 {
02226 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
02227 return 2;
02228 }
02229 }
02230
02231 if (DEE_option != NULL)
02232 {
02233 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
02234 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
02235 else
02236 {
02237 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
02238 return 2;
02239 }
02240 }
02241
02242
02243
02244 #ifdef JFRIEDL_DEBUG
02245 if (S_arg > 9)
02246 {
02247 fprintf(stderr, "pcregrep: bad value for -S option\n");
02248 return 2;
02249 }
02250 if (jfriedl_XT != 0 || jfriedl_XR != 0)
02251 {
02252 if (jfriedl_XT == 0) jfriedl_XT = 1;
02253 if (jfriedl_XR == 0) jfriedl_XR = 1;
02254 }
02255 #endif
02256
02257
02258
02259 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
02260 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
02261
02262 if (pattern_list == NULL || hints_list == NULL)
02263 {
02264 fprintf(stderr, "pcregrep: malloc failed\n");
02265 goto EXIT2;
02266 }
02267
02268
02269
02270
02271 if (cmd_pattern_count == 0 && pattern_filename == NULL)
02272 {
02273 if (i >= argc) return usage(2);
02274 patterns[cmd_pattern_count++] = argv[i++];
02275 }
02276
02277
02278
02279
02280 for (j = 0; j < cmd_pattern_count; j++)
02281 {
02282 if (!compile_pattern(patterns[j], pcre_options, NULL,
02283 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
02284 goto EXIT2;
02285 }
02286
02287
02288
02289 if (pattern_filename != NULL)
02290 {
02291 int linenumber = 0;
02292 FILE *f;
02293 char *filename;
02294 char buffer[MBUFTHIRD];
02295
02296 if (strcmp(pattern_filename, "-") == 0)
02297 {
02298 f = stdin;
02299 filename = stdin_name;
02300 }
02301 else
02302 {
02303 f = fopen(pattern_filename, "r");
02304 if (f == NULL)
02305 {
02306 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
02307 strerror(errno));
02308 goto EXIT2;
02309 }
02310 filename = pattern_filename;
02311 }
02312
02313 while (fgets(buffer, MBUFTHIRD, f) != NULL)
02314 {
02315 char *s = buffer + (int)strlen(buffer);
02316 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
02317 *s = 0;
02318 linenumber++;
02319 if (buffer[0] == 0) continue;
02320 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
02321 goto EXIT2;
02322 }
02323
02324 if (f != stdin) fclose(f);
02325 }
02326
02327
02328
02329 for (j = 0; j < pattern_count; j++)
02330 {
02331 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
02332 if (error != NULL)
02333 {
02334 char s[16];
02335 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
02336 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
02337 goto EXIT2;
02338 }
02339 hint_count++;
02340 }
02341
02342
02343
02344 if (exclude_pattern != NULL)
02345 {
02346 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
02347 pcretables);
02348 if (exclude_compiled == NULL)
02349 {
02350 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
02351 errptr, error);
02352 goto EXIT2;
02353 }
02354 }
02355
02356 if (include_pattern != NULL)
02357 {
02358 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
02359 pcretables);
02360 if (include_compiled == NULL)
02361 {
02362 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
02363 errptr, error);
02364 goto EXIT2;
02365 }
02366 }
02367
02368 if (exclude_dir_pattern != NULL)
02369 {
02370 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
02371 pcretables);
02372 if (exclude_dir_compiled == NULL)
02373 {
02374 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
02375 errptr, error);
02376 goto EXIT2;
02377 }
02378 }
02379
02380 if (include_dir_pattern != NULL)
02381 {
02382 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
02383 pcretables);
02384 if (include_dir_compiled == NULL)
02385 {
02386 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
02387 errptr, error);
02388 goto EXIT2;
02389 }
02390 }
02391
02392
02393
02394 if (i >= argc)
02395 {
02396 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
02397 goto EXIT;
02398 }
02399
02400
02401
02402
02403
02404
02405 only_one_at_top = i == argc - 1;
02406
02407 for (; i < argc; i++)
02408 {
02409 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
02410 only_one_at_top);
02411 if (frc > 1) rc = frc;
02412 else if (frc == 0 && rc == 1) rc = 0;
02413 }
02414
02415 EXIT:
02416 if (pattern_list != NULL)
02417 {
02418 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
02419 free(pattern_list);
02420 }
02421 if (hints_list != NULL)
02422 {
02423 for (i = 0; i < hint_count; i++) free(hints_list[i]);
02424 free(hints_list);
02425 }
02426 return rc;
02427
02428 EXIT2:
02429 rc = 2;
02430 goto EXIT;
02431 }
02432
02433
02434
02435