src/util/regexp/pcregrep.c

Go to the documentation of this file.
00001 /*************************************************
00002 *               pcregrep program                 *
00003 *************************************************/
00004 
00005 /* This is a grep program that uses the PCRE regular expression library to do
00006 its pattern matching. On a Unix or Win32 system it can recurse into
00007 directories.
00008 
00009            Copyright (c) 1997-2009 University of Cambridge
00010 
00011 -----------------------------------------------------------------------------
00012 Redistribution and use in source and binary forms, with or without
00013 modification, are permitted provided that the following conditions are met:
00014 
00015     * Redistributions of source code must retain the above copyright notice,
00016       this list of conditions and the following disclaimer.
00017 
00018     * Redistributions in binary form must reproduce the above copyright
00019       notice, this list of conditions and the following disclaimer in the
00020       documentation and/or other materials provided with the distribution.
00021 
00022     * Neither the name of the University of Cambridge nor the names of its
00023       contributors may be used to endorse or promote products derived from
00024       this software without specific prior written permission.
00025 
00026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00027 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00029 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00030 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00031 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00032 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00033 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00034 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00035 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00036 POSSIBILITY OF SUCH DAMAGE.
00037 -----------------------------------------------------------------------------
00038 */
00039 
00040 #ifdef HAVE_CONFIG_H
00041 #include "config.h"
00042 #endif
00043 
00044 #include <ctype.h>
00045 #include <locale.h>
00046 #include <stdio.h>
00047 #include <string.h>
00048 #include <stdlib.h>
00049 #include <errno.h>
00050 
00051 #include <sys/types.h>
00052 #include <sys/stat.h>
00053 
00054 #ifdef HAVE_UNISTD_H
00055 #include <unistd.h>
00056 #endif
00057 
00058 #ifdef SUPPORT_LIBZ
00059 #include <zlib.h>
00060 #endif
00061 
00062 #ifdef SUPPORT_LIBBZ2
00063 #include <bzlib.h>
00064 #endif
00065 
00066 #include "pcre.h"
00067 
00068 #define FALSE 0
00069 #define TRUE 1
00070 
00071 typedef int BOOL;
00072 
00073 #define MAX_PATTERN_COUNT 100
00074 #define OFFSET_SIZE 99
00075 
00076 #if BUFSIZ > 8192
00077 #define MBUFTHIRD BUFSIZ
00078 #else
00079 #define MBUFTHIRD 8192
00080 #endif
00081 
00082 /* Values for the "filenames" variable, which specifies options for file name
00083 output. The order is important; it is assumed that a file name is wanted for
00084 all values greater than FN_DEFAULT. */
00085 
00086 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
00087 
00088 /* File reading styles */
00089 
00090 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
00091 
00092 /* Actions for the -d and -D options */
00093 
00094 enum { dee_READ, dee_SKIP, dee_RECURSE };
00095 enum { DEE_READ, DEE_SKIP };
00096 
00097 /* Actions for special processing options (flag bits) */
00098 
00099 #define PO_WORD_MATCH     0x0001
00100 #define PO_LINE_MATCH     0x0002
00101 #define PO_FIXED_STRINGS  0x0004
00102 
00103 /* Line ending types */
00104 
00105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
00106 
00107 
00108 
00109 /*************************************************
00110 *               Global variables                 *
00111 *************************************************/
00112 
00113 /* Jeffrey Friedl has some debugging requirements that are not part of the
00114 regular code. */
00115 
00116 #ifdef JFRIEDL_DEBUG
00117 static int S_arg = -1;
00118 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
00119 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
00120 static const char *jfriedl_prefix = "";
00121 static const char *jfriedl_postfix = "";
00122 #endif
00123 
00124 static int  endlinetype;
00125 
00126 static char *colour_string = (char *)"1;31";
00127 static char *colour_option = NULL;
00128 static char *dee_option = NULL;
00129 static char *DEE_option = NULL;
00130 static char *newline = NULL;
00131 static char *pattern_filename = NULL;
00132 static char *stdin_name = (char *)"(standard input)";
00133 static char *locale = NULL;
00134 
00135 static const unsigned char *pcretables = NULL;
00136 
00137 static int  pattern_count = 0;
00138 static pcre **pattern_list = NULL;
00139 static pcre_extra **hints_list = NULL;
00140 
00141 static char *include_pattern = NULL;
00142 static char *exclude_pattern = NULL;
00143 static char *include_dir_pattern = NULL;
00144 static char *exclude_dir_pattern = NULL;
00145 
00146 static pcre *include_compiled = NULL;
00147 static pcre *exclude_compiled = NULL;
00148 static pcre *include_dir_compiled = NULL;
00149 static pcre *exclude_dir_compiled = NULL;
00150 
00151 static int after_context = 0;
00152 static int before_context = 0;
00153 static int both_context = 0;
00154 static int dee_action = dee_READ;
00155 static int DEE_action = DEE_READ;
00156 static int error_count = 0;
00157 static int filenames = FN_DEFAULT;
00158 static int process_options = 0;
00159 
00160 static BOOL count_only = FALSE;
00161 static BOOL do_colour = FALSE;
00162 static BOOL file_offsets = FALSE;
00163 static BOOL hyphenpending = FALSE;
00164 static BOOL invert = FALSE;
00165 static BOOL line_offsets = FALSE;
00166 static BOOL multiline = FALSE;
00167 static BOOL number = FALSE;
00168 static BOOL only_matching = FALSE;
00169 static BOOL quiet = FALSE;
00170 static BOOL silent = FALSE;
00171 static BOOL utf8 = FALSE;
00172 
00173 /* Structure for options and list of them */
00174 
00175 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
00176        OP_PATLIST };
00177 
00178 typedef struct option_item {
00179   int type;
00180   int one_char;
00181   void *dataptr;
00182   const char *long_name;
00183   const char *help_text;
00184 } option_item;
00185 
00186 /* Options without a single-letter equivalent get a negative value. This can be
00187 used to identify them. */
00188 
00189 #define N_COLOUR       (-1)
00190 #define N_EXCLUDE      (-2)
00191 #define N_EXCLUDE_DIR  (-3)
00192 #define N_HELP         (-4)
00193 #define N_INCLUDE      (-5)
00194 #define N_INCLUDE_DIR  (-6)
00195 #define N_LABEL        (-7)
00196 #define N_LOCALE       (-8)
00197 #define N_NULL         (-9)
00198 #define N_LOFFSETS     (-10)
00199 #define N_FOFFSETS     (-11)
00200 
00201 static option_item optionlist[] = {
00202   { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
00203   { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
00204   { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
00205   { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
00206   { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
00207   { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
00208   { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
00209   { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
00210   { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
00211   { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
00212   { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
00213   { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
00214   { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
00215   { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
00216   { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
00217   { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
00218   { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
00219   { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
00220   { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
00221   { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
00222   { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
00223   { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
00224   { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
00225   { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
00226   { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
00227   { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
00228   { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
00229   { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
00230   { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
00231   { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
00232   { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
00233   { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
00234 #ifdef JFRIEDL_DEBUG
00235   { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
00236 #endif
00237   { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
00238   { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
00239   { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
00240   { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
00241   { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
00242   { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
00243   { OP_NODATA,    0,        NULL,               NULL,            NULL }
00244 };
00245 
00246 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
00247 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
00248 that the combination of -w and -x has the same effect as -x on its own, so we
00249 can treat them as the same. */
00250 
00251 static const char *prefix[] = {
00252   "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
00253 
00254 static const char *suffix[] = {
00255   "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
00256 
00257 /* UTF-8 tables - used only when the newline setting is "any". */
00258 
00259 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
00260 
00261 const char utf8_table4[] = {
00262   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00263   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00264   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
00265   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
00266 
00267 
00268 
00269 /*************************************************
00270 *            OS-specific functions               *
00271 *************************************************/
00272 
00273 /* These functions are defined so that they can be made system specific,
00274 although at present the only ones are for Unix, Win32, and for "no support". */
00275 
00276 
00277 /************* Directory scanning in Unix ***********/
00278 
00279 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
00280 #include <sys/types.h>
00281 #include <sys/stat.h>
00282 #include <dirent.h>
00283 
00284 typedef DIR directory_type;
00285 
00286 static int
00287 isdirectory(char *filename)
00288 {
00289 struct stat statbuf;
00290 if (stat(filename, &statbuf) < 0)
00291   return 0;        /* In the expectation that opening as a file will fail */
00292 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
00293 }
00294 
00295 static directory_type *
00296 opendirectory(char *filename)
00297 {
00298 return opendir(filename);
00299 }
00300 
00301 static char *
00302 readdirectory(directory_type *dir)
00303 {
00304 for (;;)
00305   {
00306   struct dirent *dent = readdir(dir);
00307   if (dent == NULL) return NULL;
00308   if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
00309     return dent->d_name;
00310   }
00311 /* Control never reaches here */
00312 }
00313 
00314 static void
00315 closedirectory(directory_type *dir)
00316 {
00317 closedir(dir);
00318 }
00319 
00320 
00321 /************* Test for regular file in Unix **********/
00322 
00323 static int
00324 isregfile(char *filename)
00325 {
00326 struct stat statbuf;
00327 if (stat(filename, &statbuf) < 0)
00328   return 1;        /* In the expectation that opening as a file will fail */
00329 return (statbuf.st_mode & S_IFMT) == S_IFREG;
00330 }
00331 
00332 
00333 /************* Test stdout for being a terminal in Unix **********/
00334 
00335 static BOOL
00336 is_stdout_tty(void)
00337 {
00338 return isatty(fileno(stdout));
00339 }
00340 
00341 
00342 /************* Directory scanning in Win32 ***********/
00343 
00344 /* I (Philip Hazel) have no means of testing this code. It was contributed by
00345 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
00346 when it did not exist. David Byron added a patch that moved the #include of
00347 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
00348 */
00349 
00350 #elif HAVE_WINDOWS_H
00351 
00352 #ifndef STRICT
00353 # define STRICT
00354 #endif
00355 #ifndef WIN32_LEAN_AND_MEAN
00356 # define WIN32_LEAN_AND_MEAN
00357 #endif
00358 
00359 #include <windows.h>
00360 
00361 #ifndef INVALID_FILE_ATTRIBUTES
00362 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
00363 #endif
00364 
00365 typedef struct directory_type
00366 {
00367 HANDLE handle;
00368 BOOL first;
00369 WIN32_FIND_DATA data;
00370 } directory_type;
00371 
00372 int
00373 isdirectory(char *filename)
00374 {
00375 DWORD attr = GetFileAttributes(filename);
00376 if (attr == INVALID_FILE_ATTRIBUTES)
00377   return 0;
00378 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
00379 }
00380 
00381 directory_type *
00382 opendirectory(char *filename)
00383 {
00384 size_t len;
00385 char *pattern;
00386 directory_type *dir;
00387 DWORD err;
00388 len = strlen(filename);
00389 pattern = (char *) malloc(len + 3);
00390 dir = (directory_type *) malloc(sizeof(*dir));
00391 if ((pattern == NULL) || (dir == NULL))
00392   {
00393   fprintf(stderr, "pcregrep: malloc failed\n");
00394   exit(2);
00395   }
00396 memcpy(pattern, filename, len);
00397 memcpy(&(pattern[len]), "\\*", 3);
00398 dir->handle = FindFirstFile(pattern, &(dir->data));
00399 if (dir->handle != INVALID_HANDLE_VALUE)
00400   {
00401   free(pattern);
00402   dir->first = TRUE;
00403   return dir;
00404   }
00405 err = GetLastError();
00406 free(pattern);
00407 free(dir);
00408 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
00409 return NULL;
00410 }
00411 
00412 char *
00413 readdirectory(directory_type *dir)
00414 {
00415 for (;;)
00416   {
00417   if (!dir->first)
00418     {
00419     if (!FindNextFile(dir->handle, &(dir->data)))
00420       return NULL;
00421     }
00422   else
00423     {
00424     dir->first = FALSE;
00425     }
00426   if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
00427     return dir->data.cFileName;
00428   }
00429 #ifndef _MSC_VER
00430 return NULL;   /* Keep compiler happy; never executed */
00431 #endif
00432 }
00433 
00434 void
00435 closedirectory(directory_type *dir)
00436 {
00437 FindClose(dir->handle);
00438 free(dir);
00439 }
00440 
00441 
00442 /************* Test for regular file in Win32 **********/
00443 
00444 /* I don't know how to do this, or if it can be done; assume all paths are
00445 regular if they are not directories. */
00446 
00447 int isregfile(char *filename)
00448 {
00449 return !isdirectory(filename);
00450 }
00451 
00452 
00453 /************* Test stdout for being a terminal in Win32 **********/
00454 
00455 /* I don't know how to do this; assume never */
00456 
00457 static BOOL
00458 is_stdout_tty(void)
00459 {
00460 return FALSE;
00461 }
00462 
00463 
00464 /************* Directory scanning when we can't do it ***********/
00465 
00466 /* The type is void, and apart from isdirectory(), the functions do nothing. */
00467 
00468 #else
00469 
00470 typedef void directory_type;
00471 
00472 int isdirectory(char *filename) { return 0; }
00473 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
00474 char *readdirectory(directory_type *dir) { return (char*)0;}
00475 void closedirectory(directory_type *dir) {}
00476 
00477 
00478 /************* Test for regular when we can't do it **********/
00479 
00480 /* Assume all files are regular. */
00481 
00482 int isregfile(char *filename) { return 1; }
00483 
00484 
00485 /************* Test stdout for being a terminal when we can't do it **********/
00486 
00487 static BOOL
00488 is_stdout_tty(void)
00489 {
00490 return FALSE;
00491 }
00492 
00493 
00494 #endif
00495 
00496 
00497 
00498 #ifndef HAVE_STRERROR
00499 /*************************************************
00500 *     Provide strerror() for non-ANSI libraries  *
00501 *************************************************/
00502 
00503 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
00504 in their libraries, but can provide the same facility by this simple
00505 alternative function. */
00506 
00507 extern int   sys_nerr;
00508 extern char *sys_errlist[];
00509 
00510 char *
00511 strerror(int n)
00512 {
00513 if (n < 0 || n >= sys_nerr) return "unknown error number";
00514 return sys_errlist[n];
00515 }
00516 #endif /* HAVE_STRERROR */
00517 
00518 
00519 
00520 /*************************************************
00521 *             Find end of line                   *
00522 *************************************************/
00523 
00524 /* The length of the endline sequence that is found is set via lenptr. This may
00525 be zero at the very end of the file if there is no line-ending sequence there.
00526 
00527 Arguments:
00528   p         current position in line
00529   endptr    end of available data
00530   lenptr    where to put the length of the eol sequence
00531 
00532 Returns:    pointer to the last byte of the line
00533 */
00534 
00535 static char *
00536 end_of_line(char *p, char *endptr, int *lenptr)
00537 {
00538 switch(endlinetype)
00539   {
00540   default:      /* Just in case */
00541   case EL_LF:
00542   while (p < endptr && *p != '\n') p++;
00543   if (p < endptr)
00544     {
00545     *lenptr = 1;
00546     return p + 1;
00547     }
00548   *lenptr = 0;
00549   return endptr;
00550 
00551   case EL_CR:
00552   while (p < endptr && *p != '\r') p++;
00553   if (p < endptr)
00554     {
00555     *lenptr = 1;
00556     return p + 1;
00557     }
00558   *lenptr = 0;
00559   return endptr;
00560 
00561   case EL_CRLF:
00562   for (;;)
00563     {
00564     while (p < endptr && *p != '\r') p++;
00565     if (++p >= endptr)
00566       {
00567       *lenptr = 0;
00568       return endptr;
00569       }
00570     if (*p == '\n')
00571       {
00572       *lenptr = 2;
00573       return p + 1;
00574       }
00575     }
00576   break;
00577 
00578   case EL_ANYCRLF:
00579   while (p < endptr)
00580     {
00581     int extra = 0;
00582     register int c = *((unsigned char *)p);
00583 
00584     if (utf8 && c >= 0xc0)
00585       {
00586       int gcii, gcss;
00587       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
00588       gcss = 6*extra;
00589       c = (c & utf8_table3[extra]) << gcss;
00590       for (gcii = 1; gcii <= extra; gcii++)
00591         {
00592         gcss -= 6;
00593         c |= (p[gcii] & 0x3f) << gcss;
00594         }
00595       }
00596 
00597     p += 1 + extra;
00598 
00599     switch (c)
00600       {
00601       case 0x0a:    /* LF */
00602       *lenptr = 1;
00603       return p;
00604 
00605       case 0x0d:    /* CR */
00606       if (p < endptr && *p == 0x0a)
00607         {
00608         *lenptr = 2;
00609         p++;
00610         }
00611       else *lenptr = 1;
00612       return p;
00613 
00614       default:
00615       break;
00616       }
00617     }   /* End of loop for ANYCRLF case */
00618 
00619   *lenptr = 0;  /* Must have hit the end */
00620   return endptr;
00621 
00622   case EL_ANY:
00623   while (p < endptr)
00624     {
00625     int extra = 0;
00626     register int c = *((unsigned char *)p);
00627 
00628     if (utf8 && c >= 0xc0)
00629       {
00630       int gcii, gcss;
00631       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
00632       gcss = 6*extra;
00633       c = (c & utf8_table3[extra]) << gcss;
00634       for (gcii = 1; gcii <= extra; gcii++)
00635         {
00636         gcss -= 6;
00637         c |= (p[gcii] & 0x3f) << gcss;
00638         }
00639       }
00640 
00641     p += 1 + extra;
00642 
00643     switch (c)
00644       {
00645       case 0x0a:    /* LF */
00646       case 0x0b:    /* VT */
00647       case 0x0c:    /* FF */
00648       *lenptr = 1;
00649       return p;
00650 
00651       case 0x0d:    /* CR */
00652       if (p < endptr && *p == 0x0a)
00653         {
00654         *lenptr = 2;
00655         p++;
00656         }
00657       else *lenptr = 1;
00658       return p;
00659 
00660       case 0x85:    /* NEL */
00661       *lenptr = utf8? 2 : 1;
00662       return p;
00663 
00664       case 0x2028:  /* LS */
00665       case 0x2029:  /* PS */
00666       *lenptr = 3;
00667       return p;
00668 
00669       default:
00670       break;
00671       }
00672     }   /* End of loop for ANY case */
00673 
00674   *lenptr = 0;  /* Must have hit the end */
00675   return endptr;
00676   }     /* End of overall switch */
00677 }
00678 
00679 
00680 
00681 /*************************************************
00682 *         Find start of previous line            *
00683 *************************************************/
00684 
00685 /* This is called when looking back for before lines to print.
00686 
00687 Arguments:
00688   p         start of the subsequent line
00689   startptr  start of available data
00690 
00691 Returns:    pointer to the start of the previous line
00692 */
00693 
00694 static char *
00695 previous_line(char *p, char *startptr)
00696 {
00697 switch(endlinetype)
00698   {
00699   default:      /* Just in case */
00700   case EL_LF:
00701   p--;
00702   while (p > startptr && p[-1] != '\n') p--;
00703   return p;
00704 
00705   case EL_CR:
00706   p--;
00707   while (p > startptr && p[-1] != '\n') p--;
00708   return p;
00709 
00710   case EL_CRLF:
00711   for (;;)
00712     {
00713     p -= 2;
00714     while (p > startptr && p[-1] != '\n') p--;
00715     if (p <= startptr + 1 || p[-2] == '\r') return p;
00716     }
00717   return p;   /* But control should never get here */
00718 
00719   case EL_ANY:
00720   case EL_ANYCRLF:
00721   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
00722   if (utf8) while ((*p & 0xc0) == 0x80) p--;
00723 
00724   while (p > startptr)
00725     {
00726     register int c;
00727     char *pp = p - 1;
00728 
00729     if (utf8)
00730       {
00731       int extra = 0;
00732       while ((*pp & 0xc0) == 0x80) pp--;
00733       c = *((unsigned char *)pp);
00734       if (c >= 0xc0)
00735         {
00736         int gcii, gcss;
00737         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
00738         gcss = 6*extra;
00739         c = (c & utf8_table3[extra]) << gcss;
00740         for (gcii = 1; gcii <= extra; gcii++)
00741           {
00742           gcss -= 6;
00743           c |= (pp[gcii] & 0x3f) << gcss;
00744           }
00745         }
00746       }
00747     else c = *((unsigned char *)pp);
00748 
00749     if (endlinetype == EL_ANYCRLF) switch (c)
00750       {
00751       case 0x0a:    /* LF */
00752       case 0x0d:    /* CR */
00753       return p;
00754 
00755       default:
00756       break;
00757       }
00758 
00759     else switch (c)
00760       {
00761       case 0x0a:    /* LF */
00762       case 0x0b:    /* VT */
00763       case 0x0c:    /* FF */
00764       case 0x0d:    /* CR */
00765       case 0x85:    /* NEL */
00766       case 0x2028:  /* LS */
00767       case 0x2029:  /* PS */
00768       return p;
00769 
00770       default:
00771       break;
00772       }
00773 
00774     p = pp;  /* Back one character */
00775     }        /* End of loop for ANY case */
00776 
00777   return startptr;  /* Hit start of data */
00778   }     /* End of overall switch */
00779 }
00780 
00781 
00782 
00783 
00784 
00785 /*************************************************
00786 *       Print the previous "after" lines         *
00787 *************************************************/
00788 
00789 /* This is called if we are about to lose said lines because of buffer filling,
00790 and at the end of the file. The data in the line is written using fwrite() so
00791 that a binary zero does not terminate it.
00792 
00793 Arguments:
00794   lastmatchnumber   the number of the last matching line, plus one
00795   lastmatchrestart  where we restarted after the last match
00796   endptr            end of available data
00797   printname         filename for printing
00798 
00799 Returns:            nothing
00800 */
00801 
00802 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
00803   char *endptr, char *printname)
00804 {
00805 if (after_context > 0 && lastmatchnumber > 0)
00806   {
00807   int count = 0;
00808   while (lastmatchrestart < endptr && count++ < after_context)
00809     {
00810     int ellength;
00811     char *pp = lastmatchrestart;
00812     if (printname != NULL) fprintf(stdout, "%s-", printname);
00813     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
00814     pp = end_of_line(pp, endptr, &ellength);
00815     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
00816     lastmatchrestart = pp;
00817     }
00818   hyphenpending = TRUE;
00819   }
00820 }
00821 
00822 
00823 
00824 /*************************************************
00825 *   Apply patterns to subject till one matches   *
00826 *************************************************/
00827 
00828 /* This function is called to run through all patterns, looking for a match. It
00829 is used multiple times for the same subject when colouring is enabled, in order
00830 to find all possible matches.
00831 
00832 Arguments:
00833   matchptr    the start of the subject
00834   length      the length of the subject to match
00835   offsets     the offets vector to fill in
00836   mrc         address of where to put the result of pcre_exec()
00837 
00838 Returns:      TRUE if there was a match
00839               FALSE if there was no match
00840               invert if there was a non-fatal error
00841 */
00842 
00843 static BOOL
00844 match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
00845 {
00846 int i;
00847 for (i = 0; i < pattern_count; i++)
00848   {
00849   *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,
00850     PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
00851   if (*mrc >= 0) return TRUE;
00852   if (*mrc == PCRE_ERROR_NOMATCH) continue;
00853   fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
00854   if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
00855   fprintf(stderr, "this text:\n");
00856   fwrite(matchptr, 1, length, stderr);  /* In case binary zero included */
00857   fprintf(stderr, "\n");
00858   if (error_count == 0 &&
00859       (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
00860     {
00861     fprintf(stderr, "pcregrep: error %d means that a resource limit "
00862       "was exceeded\n", *mrc);
00863     fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
00864     }
00865   if (error_count++ > 20)
00866     {
00867     fprintf(stderr, "pcregrep: too many errors - abandoned\n");
00868     exit(2);
00869     }
00870   return invert;    /* No more matching; don't show the line again */
00871   }
00872 
00873 return FALSE;  /* No match, no errors */
00874 }
00875 
00876 
00877 
00878 /*************************************************
00879 *            Grep an individual file             *
00880 *************************************************/
00881 
00882 /* This is called from grep_or_recurse() below. It uses a buffer that is three
00883 times the value of MBUFTHIRD. The matching point is never allowed to stray into
00884 the top third of the buffer, thus keeping more of the file available for
00885 context printing or for multiline scanning. For large files, the pointer will
00886 be in the middle third most of the time, so the bottom third is available for
00887 "before" context printing.
00888 
00889 Arguments:
00890   handle       the fopened FILE stream for a normal file
00891                the gzFile pointer when reading is via libz
00892                the BZFILE pointer when reading is via libbz2
00893   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
00894   printname    the file name if it is to be printed for each match
00895                or NULL if the file name is not to be printed
00896                it cannot be NULL if filenames[_nomatch]_only is set
00897 
00898 Returns:       0 if there was at least one match
00899                1 otherwise (no matches)
00900                2 if there is a read error on a .bz2 file
00901 */
00902 
00903 static int
00904 pcregrep(void *handle, int frtype, char *printname)
00905 {
00906 int rc = 1;
00907 int linenumber = 1;
00908 int lastmatchnumber = 0;
00909 int count = 0;
00910 int filepos = 0;
00911 int offsets[OFFSET_SIZE];
00912 char *lastmatchrestart = NULL;
00913 char buffer[3*MBUFTHIRD];
00914 char *ptr = buffer;
00915 char *endptr;
00916 size_t bufflength;
00917 BOOL endhyphenpending = FALSE;
00918 FILE *in = NULL;                    /* Ensure initialized */
00919 
00920 #ifdef SUPPORT_LIBZ
00921 gzFile ingz = NULL;
00922 #endif
00923 
00924 #ifdef SUPPORT_LIBBZ2
00925 BZFILE *inbz2 = NULL;
00926 #endif
00927 
00928 
00929 /* Do the first read into the start of the buffer and set up the pointer to end
00930 of what we have. In the case of libz, a non-zipped .gz file will be read as a
00931 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
00932 fail. */
00933 
00934 #ifdef SUPPORT_LIBZ
00935 if (frtype == FR_LIBZ)
00936   {
00937   ingz = (gzFile)handle;
00938   bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
00939   }
00940 else
00941 #endif
00942 
00943 #ifdef SUPPORT_LIBBZ2
00944 if (frtype == FR_LIBBZ2)
00945   {
00946   inbz2 = (BZFILE *)handle;
00947   bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
00948   if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
00949   }                                    /* without the cast it is unsigned. */
00950 else
00951 #endif
00952 
00953   {
00954   in = (FILE *)handle;
00955   bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
00956   }
00957 
00958 endptr = buffer + bufflength;
00959 
00960 /* Loop while the current pointer is not at the end of the file. For large
00961 files, endptr will be at the end of the buffer when we are in the middle of the
00962 file, but ptr will never get there, because as soon as it gets over 2/3 of the
00963 way, the buffer is shifted left and re-filled. */
00964 
00965 while (ptr < endptr)
00966   {
00967   int endlinelength;
00968   int mrc = 0;
00969   BOOL match;
00970   char *matchptr = ptr;
00971   char *t = ptr;
00972   size_t length, linelength;
00973 
00974   /* At this point, ptr is at the start of a line. We need to find the length
00975   of the subject string to pass to pcre_exec(). In multiline mode, it is the
00976   length remainder of the data in the buffer. Otherwise, it is the length of
00977   the next line, excluding the terminating newline. After matching, we always
00978   advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
00979   option is used for compiling, so that any match is constrained to be in the
00980   first line. */
00981 
00982   t = end_of_line(t, endptr, &endlinelength);
00983   linelength = t - ptr - endlinelength;
00984   length = multiline? (size_t)(endptr - ptr) : linelength;
00985 
00986   /* Extra processing for Jeffrey Friedl's debugging. */
00987 
00988 #ifdef JFRIEDL_DEBUG
00989   if (jfriedl_XT || jfriedl_XR)
00990   {
00991       #include <sys/time.h>
00992       #include <time.h>
00993       struct timeval start_time, end_time;
00994       struct timezone dummy;
00995       int i;
00996 
00997       if (jfriedl_XT)
00998       {
00999           unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
01000           const char *orig = ptr;
01001           ptr = malloc(newlen + 1);
01002           if (!ptr) {
01003                   printf("out of memory");
01004                   exit(2);
01005           }
01006           endptr = ptr;
01007           strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
01008           for (i = 0; i < jfriedl_XT; i++) {
01009                   strncpy(endptr, orig,  length);
01010                   endptr += length;
01011           }
01012           strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
01013           length = newlen;
01014       }
01015 
01016       if (gettimeofday(&start_time, &dummy) != 0)
01017               perror("bad gettimeofday");
01018 
01019 
01020       for (i = 0; i < jfriedl_XR; i++)
01021           match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
01022               PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
01023 
01024       if (gettimeofday(&end_time, &dummy) != 0)
01025               perror("bad gettimeofday");
01026 
01027       double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
01028                       -
01029                       (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
01030 
01031       printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
01032       return 0;
01033   }
01034 #endif
01035 
01036   /* We come back here after a match when the -o option (only_matching) is set,
01037   in order to find any further matches in the same line. */
01038 
01039   ONLY_MATCHING_RESTART:
01040 
01041   /* Run through all the patterns until one matches or there is an error other
01042   than NOMATCH. This code is in a subroutine so that it can be re-used for
01043   finding subsequent matches when colouring matched lines. */
01044 
01045   match = match_patterns(matchptr, length, offsets, &mrc);
01046 
01047   /* If it's a match or a not-match (as required), do what's wanted. */
01048 
01049   if (match != invert)
01050     {
01051     BOOL hyphenprinted = FALSE;
01052 
01053     /* We've failed if we want a file that doesn't have any matches. */
01054 
01055     if (filenames == FN_NOMATCH_ONLY) return 1;
01056 
01057     /* Just count if just counting is wanted. */
01058 
01059     if (count_only) count++;
01060 
01061     /* If all we want is a file name, there is no need to scan any more lines
01062     in the file. */
01063 
01064     else if (filenames == FN_ONLY)
01065       {
01066       fprintf(stdout, "%s\n", printname);
01067       return 0;
01068       }
01069 
01070     /* Likewise, if all we want is a yes/no answer. */
01071 
01072     else if (quiet) return 0;
01073 
01074     /* The --only-matching option prints just the substring that matched, and
01075     the --file-offsets and --line-offsets options output offsets for the
01076     matching substring (they both force --only-matching). None of these options
01077     prints any context. Afterwards, adjust the start and length, and then jump
01078     back to look for further matches in the same line. If we are in invert
01079     mode, however, nothing is printed - this could be still useful because the
01080     return code is set. */
01081 
01082     else if (only_matching)
01083       {
01084       if (!invert)
01085         {
01086         if (printname != NULL) fprintf(stdout, "%s:", printname);
01087         if (number) fprintf(stdout, "%d:", linenumber);
01088         if (line_offsets)
01089           fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
01090             offsets[1] - offsets[0]);
01091         else if (file_offsets)
01092           fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
01093             offsets[1] - offsets[0]);
01094         else
01095           {
01096           if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
01097           fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
01098           if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
01099           }
01100         fprintf(stdout, "\n");
01101         matchptr += offsets[1];
01102         length -= offsets[1];
01103         match = FALSE;
01104         goto ONLY_MATCHING_RESTART;
01105         }
01106       }
01107 
01108     /* This is the default case when none of the above options is set. We print
01109     the matching lines(s), possibly preceded and/or followed by other lines of
01110     context. */
01111 
01112     else
01113       {
01114       /* See if there is a requirement to print some "after" lines from a
01115       previous match. We never print any overlaps. */
01116 
01117       if (after_context > 0 && lastmatchnumber > 0)
01118         {
01119         int ellength;
01120         int linecount = 0;
01121         char *p = lastmatchrestart;
01122 
01123         while (p < ptr && linecount < after_context)
01124           {
01125           p = end_of_line(p, ptr, &ellength);
01126           linecount++;
01127           }
01128 
01129         /* It is important to advance lastmatchrestart during this printing so
01130         that it interacts correctly with any "before" printing below. Print
01131         each line's data using fwrite() in case there are binary zeroes. */
01132 
01133         while (lastmatchrestart < p)
01134           {
01135           char *pp = lastmatchrestart;
01136           if (printname != NULL) fprintf(stdout, "%s-", printname);
01137           if (number) fprintf(stdout, "%d-", lastmatchnumber++);
01138           pp = end_of_line(pp, endptr, &ellength);
01139           fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
01140           lastmatchrestart = pp;
01141           }
01142         if (lastmatchrestart != ptr) hyphenpending = TRUE;
01143         }
01144 
01145       /* If there were non-contiguous lines printed above, insert hyphens. */
01146 
01147       if (hyphenpending)
01148         {
01149         fprintf(stdout, "--\n");
01150         hyphenpending = FALSE;
01151         hyphenprinted = TRUE;
01152         }
01153 
01154       /* See if there is a requirement to print some "before" lines for this
01155       match. Again, don't print overlaps. */
01156 
01157       if (before_context > 0)
01158         {
01159         int linecount = 0;
01160         char *p = ptr;
01161 
01162         while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
01163                linecount < before_context)
01164           {
01165           linecount++;
01166           p = previous_line(p, buffer);
01167           }
01168 
01169         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
01170           fprintf(stdout, "--\n");
01171 
01172         while (p < ptr)
01173           {
01174           int ellength;
01175           char *pp = p;
01176           if (printname != NULL) fprintf(stdout, "%s-", printname);
01177           if (number) fprintf(stdout, "%d-", linenumber - linecount--);
01178           pp = end_of_line(pp, endptr, &ellength);
01179           fwrite(p, 1, pp - p, stdout);
01180           p = pp;
01181           }
01182         }
01183 
01184       /* Now print the matching line(s); ensure we set hyphenpending at the end
01185       of the file if any context lines are being output. */
01186 
01187       if (after_context > 0 || before_context > 0)
01188         endhyphenpending = TRUE;
01189 
01190       if (printname != NULL) fprintf(stdout, "%s:", printname);
01191       if (number) fprintf(stdout, "%d:", linenumber);
01192 
01193       /* In multiline mode, we want to print to the end of the line in which
01194       the end of the matched string is found, so we adjust linelength and the
01195       line number appropriately, but only when there actually was a match
01196       (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
01197       the match will always be before the first newline sequence. */
01198 
01199       if (multiline)
01200         {
01201         int ellength;
01202         char *endmatch = ptr;
01203         if (!invert)
01204           {
01205           endmatch += offsets[1];
01206           t = ptr;
01207           while (t < endmatch)
01208             {
01209             t = end_of_line(t, endptr, &ellength);
01210             if (t <= endmatch) linenumber++; else break;
01211             }
01212           }
01213         endmatch = end_of_line(endmatch, endptr, &ellength);
01214         linelength = endmatch - ptr - ellength;
01215         }
01216 
01217       /*** NOTE: Use only fwrite() to output the data line, so that binary
01218       zeroes are treated as just another data character. */
01219 
01220       /* This extra option, for Jeffrey Friedl's debugging requirements,
01221       replaces the matched string, or a specific captured string if it exists,
01222       with X. When this happens, colouring is ignored. */
01223 
01224 #ifdef JFRIEDL_DEBUG
01225       if (S_arg >= 0 && S_arg < mrc)
01226         {
01227         int first = S_arg * 2;
01228         int last  = first + 1;
01229         fwrite(ptr, 1, offsets[first], stdout);
01230         fprintf(stdout, "X");
01231         fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
01232         }
01233       else
01234 #endif
01235 
01236       /* We have to split the line(s) up if colouring, and search for further
01237       matches. */
01238 
01239       if (do_colour)
01240         {
01241         int last_offset = 0;
01242         fwrite(ptr, 1, offsets[0], stdout);
01243         fprintf(stdout, "%c[%sm", 0x1b, colour_string);
01244         fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
01245         fprintf(stdout, "%c[00m", 0x1b);
01246         for (;;)
01247           {
01248           last_offset += offsets[1];
01249           matchptr += offsets[1];
01250           length -= offsets[1];
01251           if (!match_patterns(matchptr, length, offsets, &mrc)) break;
01252           fwrite(matchptr, 1, offsets[0], stdout);
01253           fprintf(stdout, "%c[%sm", 0x1b, colour_string);
01254           fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
01255           fprintf(stdout, "%c[00m", 0x1b);
01256           }
01257         fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,
01258           stdout);
01259         }
01260 
01261       /* Not colouring; no need to search for further matches */
01262 
01263       else fwrite(ptr, 1, linelength + endlinelength, stdout);
01264       }
01265 
01266     /* End of doing what has to be done for a match */
01267 
01268     rc = 0;    /* Had some success */
01269 
01270     /* Remember where the last match happened for after_context. We remember
01271     where we are about to restart, and that line's number. */
01272 
01273     lastmatchrestart = ptr + linelength + endlinelength;
01274     lastmatchnumber = linenumber + 1;
01275     }
01276 
01277   /* For a match in multiline inverted mode (which of course did not cause
01278   anything to be printed), we have to move on to the end of the match before
01279   proceeding. */
01280 
01281   if (multiline && invert && match)
01282     {
01283     int ellength;
01284     char *endmatch = ptr + offsets[1];
01285     t = ptr;
01286     while (t < endmatch)
01287       {
01288       t = end_of_line(t, endptr, &ellength);
01289       if (t <= endmatch) linenumber++; else break;
01290       }
01291     endmatch = end_of_line(endmatch, endptr, &ellength);
01292     linelength = endmatch - ptr - ellength;
01293     }
01294 
01295   /* Advance to after the newline and increment the line number. The file
01296   offset to the current line is maintained in filepos. */
01297 
01298   ptr += linelength + endlinelength;
01299   filepos += linelength + endlinelength;
01300   linenumber++;
01301 
01302   /* If we haven't yet reached the end of the file (the buffer is full), and
01303   the current point is in the top 1/3 of the buffer, slide the buffer down by
01304   1/3 and refill it. Before we do this, if some unprinted "after" lines are
01305   about to be lost, print them. */
01306 
01307   if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
01308     {
01309     if (after_context > 0 &&
01310         lastmatchnumber > 0 &&
01311         lastmatchrestart < buffer + MBUFTHIRD)
01312       {
01313       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
01314       lastmatchnumber = 0;
01315       }
01316 
01317     /* Now do the shuffle */
01318 
01319     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
01320     ptr -= MBUFTHIRD;
01321 
01322 #ifdef SUPPORT_LIBZ
01323     if (frtype == FR_LIBZ)
01324       bufflength = 2*MBUFTHIRD +
01325         gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
01326     else
01327 #endif
01328 
01329 #ifdef SUPPORT_LIBBZ2
01330     if (frtype == FR_LIBBZ2)
01331       bufflength = 2*MBUFTHIRD +
01332         BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
01333     else
01334 #endif
01335 
01336     bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
01337 
01338     endptr = buffer + bufflength;
01339 
01340     /* Adjust any last match point */
01341 
01342     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
01343     }
01344   }     /* Loop through the whole file */
01345 
01346 /* End of file; print final "after" lines if wanted; do_after_lines sets
01347 hyphenpending if it prints something. */
01348 
01349 if (!only_matching && !count_only)
01350   {
01351   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
01352   hyphenpending |= endhyphenpending;
01353   }
01354 
01355 /* Print the file name if we are looking for those without matches and there
01356 were none. If we found a match, we won't have got this far. */
01357 
01358 if (filenames == FN_NOMATCH_ONLY)
01359   {
01360   fprintf(stdout, "%s\n", printname);
01361   return 0;
01362   }
01363 
01364 /* Print the match count if wanted */
01365 
01366 if (count_only)
01367   {
01368   if (printname != NULL) fprintf(stdout, "%s:", printname);
01369   fprintf(stdout, "%d\n", count);
01370   }
01371 
01372 return rc;
01373 }
01374 
01375 
01376 
01377 /*************************************************
01378 *     Grep a file or recurse into a directory    *
01379 *************************************************/
01380 
01381 /* Given a path name, if it's a directory, scan all the files if we are
01382 recursing; if it's a file, grep it.
01383 
01384 Arguments:
01385   pathname          the path to investigate
01386   dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
01387   only_one_at_top   TRUE if the path is the only one at toplevel
01388 
01389 Returns:   0 if there was at least one match
01390            1 if there were no matches
01391            2 there was some kind of error
01392 
01393 However, file opening failures are suppressed if "silent" is set.
01394 */
01395 
01396 static int
01397 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
01398 {
01399 int rc = 1;
01400 int sep;
01401 int frtype;
01402 int pathlen;
01403 void *handle;
01404 FILE *in = NULL;           /* Ensure initialized */
01405 
01406 #ifdef SUPPORT_LIBZ
01407 gzFile ingz = NULL;
01408 #endif
01409 
01410 #ifdef SUPPORT_LIBBZ2
01411 BZFILE *inbz2 = NULL;
01412 #endif
01413 
01414 /* If the file name is "-" we scan stdin */
01415 
01416 if (strcmp(pathname, "-") == 0)
01417   {
01418   return pcregrep(stdin, FR_PLAIN,
01419     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
01420       stdin_name : NULL);
01421   }
01422 
01423 /* If the file is a directory, skip if skipping or if we are recursing, scan
01424 each file and directory within it, subject to any include or exclude patterns
01425 that were set. The scanning code is localized so it can be made
01426 system-specific. */
01427 
01428 if ((sep = isdirectory(pathname)) != 0)
01429   {
01430   if (dee_action == dee_SKIP) return 1;
01431   if (dee_action == dee_RECURSE)
01432     {
01433     char buffer[1024];
01434     char *nextfile;
01435     directory_type *dir = opendirectory(pathname);
01436 
01437     if (dir == NULL)
01438       {
01439       if (!silent)
01440         fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
01441           strerror(errno));
01442       return 2;
01443       }
01444 
01445     while ((nextfile = readdirectory(dir)) != NULL)
01446       {
01447       int frc, nflen;
01448       sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
01449       nflen = strlen(nextfile);
01450 
01451       if (isdirectory(buffer))
01452         {
01453         if (exclude_dir_compiled != NULL &&
01454             pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
01455           continue;
01456 
01457         if (include_dir_compiled != NULL &&
01458             pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
01459           continue;
01460         }
01461       else
01462         {
01463         if (exclude_compiled != NULL &&
01464             pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
01465           continue;
01466 
01467         if (include_compiled != NULL &&
01468             pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
01469           continue;
01470         }
01471 
01472       frc = grep_or_recurse(buffer, dir_recurse, FALSE);
01473       if (frc > 1) rc = frc;
01474        else if (frc == 0 && rc == 1) rc = 0;
01475       }
01476 
01477     closedirectory(dir);
01478     return rc;
01479     }
01480   }
01481 
01482 /* If the file is not a directory and not a regular file, skip it if that's
01483 been requested. */
01484 
01485 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
01486 
01487 /* Control reaches here if we have a regular file, or if we have a directory
01488 and recursion or skipping was not requested, or if we have anything else and
01489 skipping was not requested. The scan proceeds. If this is the first and only
01490 argument at top level, we don't show the file name, unless we are only showing
01491 the file name, or the filename was forced (-H). */
01492 
01493 pathlen = strlen(pathname);
01494 
01495 /* Open using zlib if it is supported and the file name ends with .gz. */
01496 
01497 #ifdef SUPPORT_LIBZ
01498 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
01499   {
01500   ingz = gzopen(pathname, "rb");
01501   if (ingz == NULL)
01502     {
01503     if (!silent)
01504       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
01505         strerror(errno));
01506     return 2;
01507     }
01508   handle = (void *)ingz;
01509   frtype = FR_LIBZ;
01510   }
01511 else
01512 #endif
01513 
01514 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
01515 
01516 #ifdef SUPPORT_LIBBZ2
01517 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
01518   {
01519   inbz2 = BZ2_bzopen(pathname, "rb");
01520   handle = (void *)inbz2;
01521   frtype = FR_LIBBZ2;
01522   }
01523 else
01524 #endif
01525 
01526 /* Otherwise use plain fopen(). The label is so that we can come back here if
01527 an attempt to read a .bz2 file indicates that it really is a plain file. */
01528 
01529 #ifdef SUPPORT_LIBBZ2
01530 PLAIN_FILE:
01531 #endif
01532   {
01533   in = fopen(pathname, "r");
01534   handle = (void *)in;
01535   frtype = FR_PLAIN;
01536   }
01537 
01538 /* All the opening methods return errno when they fail. */
01539 
01540 if (handle == NULL)
01541   {
01542   if (!silent)
01543     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
01544       strerror(errno));
01545   return 2;
01546   }
01547 
01548 /* Now grep the file */
01549 
01550 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
01551   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
01552 
01553 /* Close in an appropriate manner. */
01554 
01555 #ifdef SUPPORT_LIBZ
01556 if (frtype == FR_LIBZ)
01557   gzclose(ingz);
01558 else
01559 #endif
01560 
01561 /* If it is a .bz2 file and the result is 2, it means that the first attempt to
01562 read failed. If the error indicates that the file isn't in fact bzipped, try
01563 again as a normal file. */
01564 
01565 #ifdef SUPPORT_LIBBZ2
01566 if (frtype == FR_LIBBZ2)
01567   {
01568   if (rc == 2)
01569     {
01570     int errnum;
01571     const char *err = BZ2_bzerror(inbz2, &errnum);
01572     if (errnum == BZ_DATA_ERROR_MAGIC)
01573       {
01574       BZ2_bzclose(inbz2);
01575       goto PLAIN_FILE;
01576       }
01577     else if (!silent)
01578       fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
01579         pathname, err);
01580     }
01581   BZ2_bzclose(inbz2);
01582   }
01583 else
01584 #endif
01585 
01586 /* Normal file close */
01587 
01588 fclose(in);
01589 
01590 /* Pass back the yield from pcregrep(). */
01591 
01592 return rc;
01593 }
01594 
01595 
01596 
01597 
01598 /*************************************************
01599 *                Usage function                  *
01600 *************************************************/
01601 
01602 static int
01603 usage(int rc)
01604 {
01605 option_item *op;
01606 fprintf(stderr, "Usage: pcregrep [-");
01607 for (op = optionlist; op->one_char != 0; op++)
01608   {
01609   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
01610   }
01611 fprintf(stderr, "] [long options] [pattern] [files]\n");
01612 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
01613   "options.\n");
01614 return rc;
01615 }
01616 
01617 
01618 
01619 
01620 /*************************************************
01621 *                Help function                   *
01622 *************************************************/
01623 
01624 static void
01625 help(void)
01626 {
01627 option_item *op;
01628 
01629 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
01630 printf("Search for PATTERN in each FILE or standard input.\n");
01631 printf("PATTERN must be present if neither -e nor -f is used.\n");
01632 printf("\"-\" can be used as a file name to mean STDIN.\n");
01633 
01634 #ifdef SUPPORT_LIBZ
01635 printf("Files whose names end in .gz are read using zlib.\n");
01636 #endif
01637 
01638 #ifdef SUPPORT_LIBBZ2
01639 printf("Files whose names end in .bz2 are read using bzlib2.\n");
01640 #endif
01641 
01642 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
01643 printf("Other files and the standard input are read as plain files.\n\n");
01644 #else
01645 printf("All files are read as plain files, without any interpretation.\n\n");
01646 #endif
01647 
01648 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
01649 printf("Options:\n");
01650 
01651 for (op = optionlist; op->one_char != 0; op++)
01652   {
01653   int n;
01654   char s[4];
01655   if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
01656   n = 30 - printf("  %s --%s", s, op->long_name);
01657   if (n < 1) n = 1;
01658   printf("%.*s%s\n", n, "                    ", op->help_text);
01659   }
01660 
01661 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
01662 printf("trailing white space is removed and blank lines are ignored.\n");
01663 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
01664 
01665 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
01666 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
01667 }
01668 
01669 
01670 
01671 
01672 /*************************************************
01673 *    Handle a single-letter, no data option      *
01674 *************************************************/
01675 
01676 static int
01677 handle_option(int letter, int options)
01678 {
01679 switch(letter)
01680   {
01681   case N_FOFFSETS: file_offsets = TRUE; break;
01682   case N_HELP: help(); exit(0);
01683   case N_LOFFSETS: line_offsets = number = TRUE; break;
01684   case 'c': count_only = TRUE; break;
01685   case 'F': process_options |= PO_FIXED_STRINGS; break;
01686   case 'H': filenames = FN_FORCE; break;
01687   case 'h': filenames = FN_NONE; break;
01688   case 'i': options |= PCRE_CASELESS; break;
01689   case 'l': filenames = FN_ONLY; break;
01690   case 'L': filenames = FN_NOMATCH_ONLY; break;
01691   case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
01692   case 'n': number = TRUE; break;
01693   case 'o': only_matching = TRUE; break;
01694   case 'q': quiet = TRUE; break;
01695   case 'r': dee_action = dee_RECURSE; break;
01696   case 's': silent = TRUE; break;
01697   case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
01698   case 'v': invert = TRUE; break;
01699   case 'w': process_options |= PO_WORD_MATCH; break;
01700   case 'x': process_options |= PO_LINE_MATCH; break;
01701 
01702   case 'V':
01703   fprintf(stderr, "pcregrep version %s\n", pcre_version());
01704   exit(0);
01705   break;
01706 
01707   default:
01708   fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
01709   exit(usage(2));
01710   }
01711 
01712 return options;
01713 }
01714 
01715 
01716 
01717 
01718 /*************************************************
01719 *          Construct printed ordinal             *
01720 *************************************************/
01721 
01722 /* This turns a number into "1st", "3rd", etc. */
01723 
01724 static char *
01725 ordin(int n)
01726 {
01727 static char buffer[8];
01728 char *p = buffer;
01729 sprintf(p, "%d", n);
01730 while (*p != 0) p++;
01731 switch (n%10)
01732   {
01733   case 1: strcpy(p, "st"); break;
01734   case 2: strcpy(p, "nd"); break;
01735   case 3: strcpy(p, "rd"); break;
01736   default: strcpy(p, "th"); break;
01737   }
01738 return buffer;
01739 }
01740 
01741 
01742 
01743 /*************************************************
01744 *          Compile a single pattern              *
01745 *************************************************/
01746 
01747 /* When the -F option has been used, this is called for each substring.
01748 Otherwise it's called for each supplied pattern.
01749 
01750 Arguments:
01751   pattern        the pattern string
01752   options        the PCRE options
01753   filename       the file name, or NULL for a command-line pattern
01754   count          0 if this is the only command line pattern, or
01755                  number of the command line pattern, or
01756                  linenumber for a pattern from a file
01757 
01758 Returns:         TRUE on success, FALSE after an error
01759 */
01760 
01761 static BOOL
01762 compile_single_pattern(char *pattern, int options, char *filename, int count)
01763 {
01764 char buffer[MBUFTHIRD + 16];
01765 const char *error;
01766 int errptr;
01767 
01768 if (pattern_count >= MAX_PATTERN_COUNT)
01769   {
01770   fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
01771     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
01772   return FALSE;
01773   }
01774 
01775 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
01776   suffix[process_options]);
01777 pattern_list[pattern_count] =
01778   pcre_compile(buffer, options, &error, &errptr, pcretables);
01779 if (pattern_list[pattern_count] != NULL)
01780   {
01781   pattern_count++;
01782   return TRUE;
01783   }
01784 
01785 /* Handle compile errors */
01786 
01787 errptr -= (int)strlen(prefix[process_options]);
01788 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
01789 
01790 if (filename == NULL)
01791   {
01792   if (count == 0)
01793     fprintf(stderr, "pcregrep: Error in command-line regex "
01794       "at offset %d: %s\n", errptr, error);
01795   else
01796     fprintf(stderr, "pcregrep: Error in %s command-line regex "
01797       "at offset %d: %s\n", ordin(count), errptr, error);
01798   }
01799 else
01800   {
01801   fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
01802     "at offset %d: %s\n", count, filename, errptr, error);
01803   }
01804 
01805 return FALSE;
01806 }
01807 
01808 
01809 
01810 /*************************************************
01811 *           Compile one supplied pattern         *
01812 *************************************************/
01813 
01814 /* When the -F option has been used, each string may be a list of strings,
01815 separated by line breaks. They will be matched literally.
01816 
01817 Arguments:
01818   pattern        the pattern string
01819   options        the PCRE options
01820   filename       the file name, or NULL for a command-line pattern
01821   count          0 if this is the only command line pattern, or
01822                  number of the command line pattern, or
01823                  linenumber for a pattern from a file
01824 
01825 Returns:         TRUE on success, FALSE after an error
01826 */
01827 
01828 static BOOL
01829 compile_pattern(char *pattern, int options, char *filename, int count)
01830 {
01831 if ((process_options & PO_FIXED_STRINGS) != 0)
01832   {
01833   char *eop = pattern + strlen(pattern);
01834   char buffer[MBUFTHIRD];
01835   for(;;)
01836     {
01837     int ellength;
01838     char *p = end_of_line(pattern, eop, &ellength);
01839     if (ellength == 0)
01840       return compile_single_pattern(pattern, options, filename, count);
01841     sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
01842     pattern = p;
01843     if (!compile_single_pattern(buffer, options, filename, count))
01844       return FALSE;
01845     }
01846   }
01847 else return compile_single_pattern(pattern, options, filename, count);
01848 }
01849 
01850 
01851 
01852 /*************************************************
01853 *                Main program                    *
01854 *************************************************/
01855 
01856 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
01857 
01858 int
01859 main(int argc, char **argv)
01860 {
01861 int i, j;
01862 int rc = 1;
01863 int pcre_options = 0;
01864 int cmd_pattern_count = 0;
01865 int hint_count = 0;
01866 int errptr;
01867 BOOL only_one_at_top;
01868 char *patterns[MAX_PATTERN_COUNT];
01869 const char *locale_from = "--locale";
01870 const char *error;
01871 
01872 /* Set the default line ending value from the default in the PCRE library;
01873 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
01874 Note that the return values from pcre_config(), though derived from the ASCII
01875 codes, are the same in EBCDIC environments, so we must use the actual values
01876 rather than escapes such as as '\r'. */
01877 
01878 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
01879 switch(i)
01880   {
01881   default:               newline = (char *)"lf"; break;
01882   case 13:               newline = (char *)"cr"; break;
01883   case (13 << 8) | 10:   newline = (char *)"crlf"; break;
01884   case -1:               newline = (char *)"any"; break;
01885   case -2:               newline = (char *)"anycrlf"; break;
01886   }
01887 
01888 /* Process the options */
01889 
01890 for (i = 1; i < argc; i++)
01891   {
01892   option_item *op = NULL;
01893   char *option_data = (char *)"";    /* default to keep compiler happy */
01894   BOOL longop;
01895   BOOL longopwasequals = FALSE;
01896 
01897   if (argv[i][0] != '-') break;
01898 
01899   /* If we hit an argument that is just "-", it may be a reference to STDIN,
01900   but only if we have previously had -e or -f to define the patterns. */
01901 
01902   if (argv[i][1] == 0)
01903     {
01904     if (pattern_filename != NULL || pattern_count > 0) break;
01905       else exit(usage(2));
01906     }
01907 
01908   /* Handle a long name option, or -- to terminate the options */
01909 
01910   if (argv[i][1] == '-')
01911     {
01912     char *arg = argv[i] + 2;
01913     char *argequals = strchr(arg, '=');
01914 
01915     if (*arg == 0)    /* -- terminates options */
01916       {
01917       i++;
01918       break;                /* out of the options-handling loop */
01919       }
01920 
01921     longop = TRUE;
01922 
01923     /* Some long options have data that follows after =, for example file=name.
01924     Some options have variations in the long name spelling: specifically, we
01925     allow "regexp" because GNU grep allows it, though I personally go along
01926     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
01927     These options are entered in the table as "regex(p)". No option is in both
01928     these categories, fortunately. */
01929 
01930     for (op = optionlist; op->one_char != 0; op++)
01931       {
01932       char *opbra = strchr(op->long_name, '(');
01933       char *equals = strchr(op->long_name, '=');
01934       if (opbra == NULL)     /* Not a (p) case */
01935         {
01936         if (equals == NULL)  /* Not thing=data case */
01937           {
01938           if (strcmp(arg, op->long_name) == 0) break;
01939           }
01940         else                 /* Special case xxx=data */
01941           {
01942           int oplen = equals - op->long_name;
01943           int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
01944           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
01945             {
01946             option_data = arg + arglen;
01947             if (*option_data == '=')
01948               {
01949               option_data++;
01950               longopwasequals = TRUE;
01951               }
01952             break;
01953             }
01954           }
01955         }
01956       else                   /* Special case xxxx(p) */
01957         {
01958         char buff1[24];
01959         char buff2[24];
01960         int baselen = opbra - op->long_name;
01961         sprintf(buff1, "%.*s", baselen, op->long_name);
01962         sprintf(buff2, "%s%.*s", buff1,
01963           (int)strlen(op->long_name) - baselen - 2, opbra + 1);
01964         if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
01965           break;
01966         }
01967       }
01968 
01969     if (op->one_char == 0)
01970       {
01971       fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
01972       exit(usage(2));
01973       }
01974     }
01975 
01976 
01977   /* Jeffrey Friedl's debugging harness uses these additional options which
01978   are not in the right form for putting in the option table because they use
01979   only one hyphen, yet are more than one character long. By putting them
01980   separately here, they will not get displayed as part of the help() output,
01981   but I don't think Jeffrey will care about that. */
01982 
01983 #ifdef JFRIEDL_DEBUG
01984   else if (strcmp(argv[i], "-pre") == 0) {
01985           jfriedl_prefix = argv[++i];
01986           continue;
01987   } else if (strcmp(argv[i], "-post") == 0) {
01988           jfriedl_postfix = argv[++i];
01989           continue;
01990   } else if (strcmp(argv[i], "-XT") == 0) {
01991           sscanf(argv[++i], "%d", &jfriedl_XT);
01992           continue;
01993   } else if (strcmp(argv[i], "-XR") == 0) {
01994           sscanf(argv[++i], "%d", &jfriedl_XR);
01995           continue;
01996   }
01997 #endif
01998 
01999 
02000   /* One-char options; many that have no data may be in a single argument; we
02001   continue till we hit the last one or one that needs data. */
02002 
02003   else
02004     {
02005     char *s = argv[i] + 1;
02006     longop = FALSE;
02007     while (*s != 0)
02008       {
02009       for (op = optionlist; op->one_char != 0; op++)
02010         { if (*s == op->one_char) break; }
02011       if (op->one_char == 0)
02012         {
02013         fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
02014           *s, argv[i]);
02015         exit(usage(2));
02016         }
02017       if (op->type != OP_NODATA || s[1] == 0)
02018         {
02019         option_data = s+1;
02020         break;
02021         }
02022       pcre_options = handle_option(*s++, pcre_options);
02023       }
02024     }
02025 
02026   /* At this point we should have op pointing to a matched option. If the type
02027   is NO_DATA, it means that there is no data, and the option might set
02028   something in the PCRE options. */
02029 
02030   if (op->type == OP_NODATA)
02031     {
02032     pcre_options = handle_option(op->one_char, pcre_options);
02033     continue;
02034     }
02035 
02036   /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
02037   either has a value or defaults to something. It cannot have data in a
02038   separate item. At the moment, the only such options are "colo(u)r" and
02039   Jeffrey Friedl's special -S debugging option. */
02040 
02041   if (*option_data == 0 &&
02042       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
02043     {
02044     switch (op->one_char)
02045       {
02046       case N_COLOUR:
02047       colour_option = (char *)"auto";
02048       break;
02049 #ifdef JFRIEDL_DEBUG
02050       case 'S':
02051       S_arg = 0;
02052       break;
02053 #endif
02054       }
02055     continue;
02056     }
02057 
02058   /* Otherwise, find the data string for the option. */
02059 
02060   if (*option_data == 0)
02061     {
02062     if (i >= argc - 1 || longopwasequals)
02063       {
02064       fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
02065       exit(usage(2));
02066       }
02067     option_data = argv[++i];
02068     }
02069 
02070   /* If the option type is OP_PATLIST, it's the -e option, which can be called
02071   multiple times to create a list of patterns. */
02072 
02073   if (op->type == OP_PATLIST)
02074     {
02075     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
02076       {
02077       fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
02078         MAX_PATTERN_COUNT);
02079       return 2;
02080       }
02081     patterns[cmd_pattern_count++] = option_data;
02082     }
02083 
02084   /* Otherwise, deal with single string or numeric data values. */
02085 
02086   else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
02087     {
02088     *((char **)op->dataptr) = option_data;
02089     }
02090   else
02091     {
02092     char *endptr;
02093     int n = strtoul(option_data, &endptr, 10);
02094     if (*endptr != 0)
02095       {
02096       if (longop)
02097         {
02098         char *equals = strchr(op->long_name, '=');
02099         int nlen = (equals == NULL)? (int)strlen(op->long_name) :
02100           equals - op->long_name;
02101         fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
02102           option_data, nlen, op->long_name);
02103         }
02104       else
02105         fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
02106           option_data, op->one_char);
02107       exit(usage(2));
02108       }
02109     *((int *)op->dataptr) = n;
02110     }
02111   }
02112 
02113 /* Options have been decoded. If -C was used, its value is used as a default
02114 for -A and -B. */
02115 
02116 if (both_context > 0)
02117   {
02118   if (after_context == 0) after_context = both_context;
02119   if (before_context == 0) before_context = both_context;
02120   }
02121 
02122 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
02123 However, the latter two set the only_matching flag. */
02124 
02125 if ((only_matching && (file_offsets || line_offsets)) ||
02126     (file_offsets && line_offsets))
02127   {
02128   fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
02129     "and/or --line-offsets\n");
02130   exit(usage(2));
02131   }
02132 
02133 if (file_offsets || line_offsets) only_matching = TRUE;
02134 
02135 /* If a locale has not been provided as an option, see if the LC_CTYPE or
02136 LC_ALL environment variable is set, and if so, use it. */
02137 
02138 if (locale == NULL)
02139   {
02140   locale = getenv("LC_ALL");
02141   locale_from = "LCC_ALL";
02142   }
02143 
02144 if (locale == NULL)
02145   {
02146   locale = getenv("LC_CTYPE");
02147   locale_from = "LC_CTYPE";
02148   }
02149 
02150 /* If a locale has been provided, set it, and generate the tables the PCRE
02151 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
02152 
02153 if (locale != NULL)
02154   {
02155   if (setlocale(LC_CTYPE, locale) == NULL)
02156     {
02157     fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
02158       locale, locale_from);
02159     return 2;
02160     }
02161   pcretables = pcre_maketables();
02162   }
02163 
02164 /* Sort out colouring */
02165 
02166 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
02167   {
02168   if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
02169   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
02170   else
02171     {
02172     fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
02173       colour_option);
02174     return 2;
02175     }
02176   if (do_colour)
02177     {
02178     char *cs = getenv("PCREGREP_COLOUR");
02179     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
02180     if (cs != NULL) colour_string = cs;
02181     }
02182   }
02183 
02184 /* Interpret the newline type; the default settings are Unix-like. */
02185 
02186 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
02187   {
02188   pcre_options |= PCRE_NEWLINE_CR;
02189   endlinetype = EL_CR;
02190   }
02191 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
02192   {
02193   pcre_options |= PCRE_NEWLINE_LF;
02194   endlinetype = EL_LF;
02195   }
02196 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
02197   {
02198   pcre_options |= PCRE_NEWLINE_CRLF;
02199   endlinetype = EL_CRLF;
02200   }
02201 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
02202   {
02203   pcre_options |= PCRE_NEWLINE_ANY;
02204   endlinetype = EL_ANY;
02205   }
02206 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
02207   {
02208   pcre_options |= PCRE_NEWLINE_ANYCRLF;
02209   endlinetype = EL_ANYCRLF;
02210   }
02211 else
02212   {
02213   fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
02214   return 2;
02215   }
02216 
02217 /* Interpret the text values for -d and -D */
02218 
02219 if (dee_option != NULL)
02220   {
02221   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
02222   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
02223   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
02224   else
02225     {
02226     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
02227     return 2;
02228     }
02229   }
02230 
02231 if (DEE_option != NULL)
02232   {
02233   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
02234   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
02235   else
02236     {
02237     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
02238     return 2;
02239     }
02240   }
02241 
02242 /* Check the values for Jeffrey Friedl's debugging options. */
02243 
02244 #ifdef JFRIEDL_DEBUG
02245 if (S_arg > 9)
02246   {
02247   fprintf(stderr, "pcregrep: bad value for -S option\n");
02248   return 2;
02249   }
02250 if (jfriedl_XT != 0 || jfriedl_XR != 0)
02251   {
02252   if (jfriedl_XT == 0) jfriedl_XT = 1;
02253   if (jfriedl_XR == 0) jfriedl_XR = 1;
02254   }
02255 #endif
02256 
02257 /* Get memory to store the pattern and hints lists. */
02258 
02259 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
02260 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
02261 
02262 if (pattern_list == NULL || hints_list == NULL)
02263   {
02264   fprintf(stderr, "pcregrep: malloc failed\n");
02265   goto EXIT2;
02266   }
02267 
02268 /* If no patterns were provided by -e, and there is no file provided by -f,
02269 the first argument is the one and only pattern, and it must exist. */
02270 
02271 if (cmd_pattern_count == 0 && pattern_filename == NULL)
02272   {
02273   if (i >= argc) return usage(2);
02274   patterns[cmd_pattern_count++] = argv[i++];
02275   }
02276 
02277 /* Compile the patterns that were provided on the command line, either by
02278 multiple uses of -e or as a single unkeyed pattern. */
02279 
02280 for (j = 0; j < cmd_pattern_count; j++)
02281   {
02282   if (!compile_pattern(patterns[j], pcre_options, NULL,
02283        (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
02284     goto EXIT2;
02285   }
02286 
02287 /* Compile the regular expressions that are provided in a file. */
02288 
02289 if (pattern_filename != NULL)
02290   {
02291   int linenumber = 0;
02292   FILE *f;
02293   char *filename;
02294   char buffer[MBUFTHIRD];
02295 
02296   if (strcmp(pattern_filename, "-") == 0)
02297     {
02298     f = stdin;
02299     filename = stdin_name;
02300     }
02301   else
02302     {
02303     f = fopen(pattern_filename, "r");
02304     if (f == NULL)
02305       {
02306       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
02307         strerror(errno));
02308       goto EXIT2;
02309       }
02310     filename = pattern_filename;
02311     }
02312 
02313   while (fgets(buffer, MBUFTHIRD, f) != NULL)
02314     {
02315     char *s = buffer + (int)strlen(buffer);
02316     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
02317     *s = 0;
02318     linenumber++;
02319     if (buffer[0] == 0) continue;   /* Skip blank lines */
02320     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
02321       goto EXIT2;
02322     }
02323 
02324   if (f != stdin) fclose(f);
02325   }
02326 
02327 /* Study the regular expressions, as we will be running them many times */
02328 
02329 for (j = 0; j < pattern_count; j++)
02330   {
02331   hints_list[j] = pcre_study(pattern_list[j], 0, &error);
02332   if (error != NULL)
02333     {
02334     char s[16];
02335     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
02336     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
02337     goto EXIT2;
02338     }
02339   hint_count++;
02340   }
02341 
02342 /* If there are include or exclude patterns, compile them. */
02343 
02344 if (exclude_pattern != NULL)
02345   {
02346   exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
02347     pcretables);
02348   if (exclude_compiled == NULL)
02349     {
02350     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
02351       errptr, error);
02352     goto EXIT2;
02353     }
02354   }
02355 
02356 if (include_pattern != NULL)
02357   {
02358   include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
02359     pcretables);
02360   if (include_compiled == NULL)
02361     {
02362     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
02363       errptr, error);
02364     goto EXIT2;
02365     }
02366   }
02367 
02368 if (exclude_dir_pattern != NULL)
02369   {
02370   exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
02371     pcretables);
02372   if (exclude_dir_compiled == NULL)
02373     {
02374     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
02375       errptr, error);
02376     goto EXIT2;
02377     }
02378   }
02379 
02380 if (include_dir_pattern != NULL)
02381   {
02382   include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
02383     pcretables);
02384   if (include_dir_compiled == NULL)
02385     {
02386     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
02387       errptr, error);
02388     goto EXIT2;
02389     }
02390   }
02391 
02392 /* If there are no further arguments, do the business on stdin and exit. */
02393 
02394 if (i >= argc)
02395   {
02396   rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
02397   goto EXIT;
02398   }
02399 
02400 /* Otherwise, work through the remaining arguments as files or directories.
02401 Pass in the fact that there is only one argument at top level - this suppresses
02402 the file name if the argument is not a directory and filenames are not
02403 otherwise forced. */
02404 
02405 only_one_at_top = i == argc - 1;   /* Catch initial value of i */
02406 
02407 for (; i < argc; i++)
02408   {
02409   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
02410     only_one_at_top);
02411   if (frc > 1) rc = frc;
02412     else if (frc == 0 && rc == 1) rc = 0;
02413   }
02414 
02415 EXIT:
02416 if (pattern_list != NULL)
02417   {
02418   for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
02419   free(pattern_list);
02420   }
02421 if (hints_list != NULL)
02422   {
02423   for (i = 0; i < hint_count; i++) free(hints_list[i]);
02424   free(hints_list);
02425   }
02426 return rc;
02427 
02428 EXIT2:
02429 rc = 2;
02430 goto EXIT;
02431 }
02432 
02433 /* End of pcregrep */
02434 
02435 

Generated on Sun Dec 6 22:44:22 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Dec 07 16:21:15 2009 by modify_doxy.py rev. 173732