NCBI C++ Toolkit Cross Reference

C++/src/util/regexp/pcre_printint.src


  1 /*************************************************
  2 *      Perl-Compatible Regular Expressions       *
  3 *************************************************/
  4 
  5 /* PCRE is a library of functions to support regular expressions whose syntax
  6 and semantics are as close as possible to those of the Perl 5 language.
  7 
  8                        Written by Philip Hazel
  9            Copyright (c) 1997-2009 University of Cambridge
 10 
 11 -----------------------------------------------------------------------------
 12 Redistribution and use in source and binary forms, with or without
 13 modification, are permitted provided that the following conditions are met:
 14 
 15     * Redistributions of source code must retain the above copyright notice,
 16       this list of conditions and the following disclaimer.
 17 
 18     * Redistributions in binary form must reproduce the above copyright
 19       notice, this list of conditions and the following disclaimer in the
 20       documentation and/or other materials provided with the distribution.
 21 
 22     * Neither the name of the University of Cambridge nor the names of its
 23       contributors may be used to endorse or promote products derived from
 24       this software without specific prior written permission.
 25 
 26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 36 POSSIBILITY OF SUCH DAMAGE.
 37 -----------------------------------------------------------------------------
 38 */
 39 
 40 
 41 /* This module contains a PCRE private debugging function for printing out the
 42 internal form of a compiled regular expression, along with some supporting
 43 local functions. This source file is used in two places:
 44 
 45 (1) It is #included by pcre_compile.c when it is compiled in debugging mode
 46 (DEBUG defined in pcre_internal.h). It is not included in production compiles.
 47 
 48 (2) It is always #included by pcretest.c, which can be asked to print out a
 49 compiled regex for debugging purposes. */
 50 
 51 
 52 /* Macro that decides whether a character should be output as a literal or in
 53 hexadecimal. We don't use isprint() because that can vary from system to system
 54 (even without the use of locales) and we want the output always to be the same,
 55 for testing purposes. This macro is used in pcretest as well as in this file. */
 56 
 57 #ifdef EBCDIC
 58 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
 59 #else
 60 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
 61 #endif
 62 
 63 /* The table of operator names. */
 64 
 65 static const char *OP_names[] = { OP_NAME_LIST };
 66 
 67 
 68 
 69 /*************************************************
 70 *       Print single- or multi-byte character    *
 71 *************************************************/
 72 
 73 static int
 74 print_char(FILE *f, uschar *ptr, BOOL utf8)
 75 {
 76 int c = *ptr;
 77 
 78 #ifndef SUPPORT_UTF8
 79 utf8 = utf8;  /* Avoid compiler warning */
 80 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
 81 return 0;
 82 
 83 #else
 84 if (!utf8 || (c & 0xc0) != 0xc0)
 85   {
 86   if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
 87   return 0;
 88   }
 89 else
 90   {
 91   int i;
 92   int a = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */
 93   int s = 6*a;
 94   c = (c & _pcre_utf8_table3[a]) << s;
 95   for (i = 1; i <= a; i++)
 96     {
 97     /* This is a check for malformed UTF-8; it should only occur if the sanity
 98     check has been turned off. Rather than swallow random bytes, just stop if
 99     we hit a bad one. Print it with \X instead of \x as an indication. */
100 
101     if ((ptr[i] & 0xc0) != 0x80)
102       {
103       fprintf(f, "\\X{%x}", c);
104       return i - 1;
105       }
106 
107     /* The byte is OK */
108 
109     s -= 6;
110     c |= (ptr[i] & 0x3f) << s;
111     }
112   if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
113   return a;
114   }
115 #endif
116 }
117 
118 
119 
120 /*************************************************
121 *          Find Unicode property name            *
122 *************************************************/
123 
124 static const char *
125 get_ucpname(int ptype, int pvalue)
126 {
127 #ifdef SUPPORT_UCP
128 int i;
129 for (i = _pcre_utt_size - 1; i >= 0; i--)
130   {
131   if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
132   }
133 return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??";
134 #else
135 /* It gets harder and harder to shut off unwanted compiler warnings. */
136 ptype = ptype * pvalue;
137 return (ptype == pvalue)? "??" : "??";
138 #endif
139 }
140 
141 
142 
143 /*************************************************
144 *         Print compiled regex                   *
145 *************************************************/
146 
147 /* Make this function work for a regex with integers either byte order.
148 However, we assume that what we are passed is a compiled regex. The
149 print_lengths flag controls whether offsets and lengths of items are printed.
150 They can be turned off from pcretest so that automatic tests on bytecode can be
151 written that do not depend on the value of LINK_SIZE. */
152 
153 static void
154 pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
155 {
156 real_pcre *re = (real_pcre *)external_re;
157 uschar *codestart, *code;
158 BOOL utf8;
159 
160 unsigned int options = re->options;
161 int offset = re->name_table_offset;
162 int count = re->name_count;
163 int size = re->name_entry_size;
164 
165 if (re->magic_number != MAGIC_NUMBER)
166   {
167   offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
168   count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
169   size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
170   options = ((options << 24) & 0xff000000) |
171             ((options <<  8) & 0x00ff0000) |
172             ((options >>  8) & 0x0000ff00) |
173             ((options >> 24) & 0x000000ff);
174   }
175 
176 code = codestart = (uschar *)re + offset + count * size;
177 utf8 = (options & PCRE_UTF8) != 0;
178 
179 for(;;)
180   {
181   uschar *ccode;
182   int c;
183   int extra = 0;
184 
185   if (print_lengths)
186     fprintf(f, "%3d ", (int)(code - codestart));
187   else
188     fprintf(f, "    ");
189 
190   switch(*code)
191     {
192     case OP_END:
193     fprintf(f, "    %s\n", OP_names[*code]);
194     fprintf(f, "------------------------------------------------------------------\n");
195     return;
196 
197     case OP_OPT:
198     fprintf(f, " %.2x %s", code[1], OP_names[*code]);
199     break;
200 
201     case OP_CHAR:
202     fprintf(f, "    ");
203     do
204       {
205       code++;
206       code += 1 + print_char(f, code, utf8);
207       }
208     while (*code == OP_CHAR);
209     fprintf(f, "\n");
210     continue;
211 
212     case OP_CHARNC:
213     fprintf(f, " NC ");
214     do
215       {
216       code++;
217       code += 1 + print_char(f, code, utf8);
218       }
219     while (*code == OP_CHARNC);
220     fprintf(f, "\n");
221     continue;
222 
223     case OP_CBRA:
224     case OP_SCBRA:
225     if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
226       else fprintf(f, "    ");
227     fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
228     break;
229 
230     case OP_BRA:
231     case OP_SBRA:
232     case OP_KETRMAX:
233     case OP_KETRMIN:
234     case OP_ALT:
235     case OP_KET:
236     case OP_ASSERT:
237     case OP_ASSERT_NOT:
238     case OP_ASSERTBACK:
239     case OP_ASSERTBACK_NOT:
240     case OP_ONCE:
241     case OP_COND:
242     case OP_SCOND:
243     case OP_REVERSE:
244     if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
245       else fprintf(f, "    ");
246     fprintf(f, "%s", OP_names[*code]);
247     break;
248 
249     case OP_CREF:
250     fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
251     break;
252 
253     case OP_RREF:
254     c = GET2(code, 1);
255     if (c == RREF_ANY)
256       fprintf(f, "    Cond recurse any");
257     else
258       fprintf(f, "    Cond recurse %d", c);
259     break;
260 
261     case OP_DEF:
262     fprintf(f, "    Cond def");
263     break;
264 
265     case OP_STAR:
266     case OP_MINSTAR:
267     case OP_POSSTAR:
268     case OP_PLUS:
269     case OP_MINPLUS:
270     case OP_POSPLUS:
271     case OP_QUERY:
272     case OP_MINQUERY:
273     case OP_POSQUERY:
274     case OP_TYPESTAR:
275     case OP_TYPEMINSTAR:
276     case OP_TYPEPOSSTAR:
277     case OP_TYPEPLUS:
278     case OP_TYPEMINPLUS:
279     case OP_TYPEPOSPLUS:
280     case OP_TYPEQUERY:
281     case OP_TYPEMINQUERY:
282     case OP_TYPEPOSQUERY:
283     fprintf(f, "    ");
284     if (*code >= OP_TYPESTAR)
285       {
286       fprintf(f, "%s", OP_names[code[1]]);
287       if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
288         {
289         fprintf(f, " %s ", get_ucpname(code[2], code[3]));
290         extra = 2;
291         }
292       }
293     else extra = print_char(f, code+1, utf8);
294     fprintf(f, "%s", OP_names[*code]);
295     break;
296 
297     case OP_EXACT:
298     case OP_UPTO:
299     case OP_MINUPTO:
300     case OP_POSUPTO:
301     fprintf(f, "    ");
302     extra = print_char(f, code+3, utf8);
303     fprintf(f, "{");
304     if (*code != OP_EXACT) fprintf(f, "0,");
305     fprintf(f, "%d}", GET2(code,1));
306     if (*code == OP_MINUPTO) fprintf(f, "?");
307       else if (*code == OP_POSUPTO) fprintf(f, "+");
308     break;
309 
310     case OP_TYPEEXACT:
311     case OP_TYPEUPTO:
312     case OP_TYPEMINUPTO:
313     case OP_TYPEPOSUPTO:
314     fprintf(f, "    %s", OP_names[code[3]]);
315     if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
316       {
317       fprintf(f, " %s ", get_ucpname(code[4], code[5]));
318       extra = 2;
319       }
320     fprintf(f, "{");
321     if (*code != OP_TYPEEXACT) fprintf(f, "0,");
322     fprintf(f, "%d}", GET2(code,1));
323     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
324       else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
325     break;
326 
327     case OP_NOT:
328     c = code[1];
329     if (PRINTABLE(c)) fprintf(f, "    [^%c]", c);
330       else fprintf(f, "    [^\\x%02x]", c);
331     break;
332 
333     case OP_NOTSTAR:
334     case OP_NOTMINSTAR:
335     case OP_NOTPOSSTAR:
336     case OP_NOTPLUS:
337     case OP_NOTMINPLUS:
338     case OP_NOTPOSPLUS:
339     case OP_NOTQUERY:
340     case OP_NOTMINQUERY:
341     case OP_NOTPOSQUERY:
342     c = code[1];
343     if (PRINTABLE(c)) fprintf(f, "    [^%c]", c);
344       else fprintf(f, "    [^\\x%02x]", c);
345     fprintf(f, "%s", OP_names[*code]);
346     break;
347 
348     case OP_NOTEXACT:
349     case OP_NOTUPTO:
350     case OP_NOTMINUPTO:
351     case OP_NOTPOSUPTO:
352     c = code[3];
353     if (PRINTABLE(c)) fprintf(f, "    [^%c]{", c);
354       else fprintf(f, "    [^\\x%02x]{", c);
355     if (*code != OP_NOTEXACT) fprintf(f, "0,");
356     fprintf(f, "%d}", GET2(code,1));
357     if (*code == OP_NOTMINUPTO) fprintf(f, "?");
358       else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
359     break;
360 
361     case OP_RECURSE:
362     if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
363       else fprintf(f, "    ");
364     fprintf(f, "%s", OP_names[*code]);
365     break;
366 
367     case OP_REF:
368     fprintf(f, "    \\%d", GET2(code,1));
369     ccode = code + _pcre_OP_lengths[*code];
370     goto CLASS_REF_REPEAT;
371 
372     case OP_CALLOUT:
373     fprintf(f, "    %s %d %d %d", OP_names[*code], code[1], GET(code,2),
374       GET(code, 2 + LINK_SIZE));
375     break;
376 
377     case OP_PROP:
378     case OP_NOTPROP:
379     fprintf(f, "    %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
380     break;
381 
382     /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
383     having this code always here, and it makes it less messy without all those
384     #ifdefs. */
385 
386     case OP_CLASS:
387     case OP_NCLASS:
388     case OP_XCLASS:
389       {
390       int i, min, max;
391       BOOL printmap;
392 
393       fprintf(f, "    [");
394 
395       if (*code == OP_XCLASS)
396         {
397         extra = GET(code, 1);
398         ccode = code + LINK_SIZE + 1;
399         printmap = (*ccode & XCL_MAP) != 0;
400         if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
401         }
402       else
403         {
404         printmap = TRUE;
405         ccode = code + 1;
406         }
407 
408       /* Print a bit map */
409 
410       if (printmap)
411         {
412         for (i = 0; i < 256; i++)
413           {
414           if ((ccode[i/8] & (1 << (i&7))) != 0)
415             {
416             int j;
417             for (j = i+1; j < 256; j++)
418               if ((ccode[j/8] & (1 << (j&7))) == 0) break;
419             if (i == '-' || i == ']') fprintf(f, "\\");
420             if (PRINTABLE(i)) fprintf(f, "%c", i);
421               else fprintf(f, "\\x%02x", i);
422             if (--j > i)
423               {
424               if (j != i + 1) fprintf(f, "-");
425               if (j == '-' || j == ']') fprintf(f, "\\");
426               if (PRINTABLE(j)) fprintf(f, "%c", j);
427                 else fprintf(f, "\\x%02x", j);
428               }
429             i = j;
430             }
431           }
432         ccode += 32;
433         }
434 
435       /* For an XCLASS there is always some additional data */
436 
437       if (*code == OP_XCLASS)
438         {
439         int ch;
440         while ((ch = *ccode++) != XCL_END)
441           {
442           if (ch == XCL_PROP)
443             {
444             int ptype = *ccode++;
445             int pvalue = *ccode++;
446             fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
447             }
448           else if (ch == XCL_NOTPROP)
449             {
450             int ptype = *ccode++;
451             int pvalue = *ccode++;
452             fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
453             }
454           else
455             {
456             ccode += 1 + print_char(f, ccode, TRUE);
457             if (ch == XCL_RANGE)
458               {
459               fprintf(f, "-");
460               ccode += 1 + print_char(f, ccode, TRUE);
461               }
462             }
463           }
464         }
465 
466       /* Indicate a non-UTF8 class which was created by negation */
467 
468       fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
469 
470       /* Handle repeats after a class or a back reference */
471 
472       CLASS_REF_REPEAT:
473       switch(*ccode)
474         {
475         case OP_CRSTAR:
476         case OP_CRMINSTAR:
477         case OP_CRPLUS:
478         case OP_CRMINPLUS:
479         case OP_CRQUERY:
480         case OP_CRMINQUERY:
481         fprintf(f, "%s", OP_names[*ccode]);
482         extra += _pcre_OP_lengths[*ccode];
483         break;
484 
485         case OP_CRRANGE:
486         case OP_CRMINRANGE:
487         min = GET2(ccode,1);
488         max = GET2(ccode,3);
489         if (max == 0) fprintf(f, "{%d,}", min);
490         else fprintf(f, "{%d,%d}", min, max);
491         if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
492         extra += _pcre_OP_lengths[*ccode];
493         break;
494 
495         /* Do nothing if it's not a repeat; this code stops picky compilers
496         warning about the lack of a default code path. */
497 
498         default:
499         break;
500         }
501       }
502     break;
503 
504     /* Anything else is just an item with no data*/
505 
506     default:
507     fprintf(f, "    %s", OP_names[*code]);
508     break;
509     }
510 
511   code += _pcre_OP_lengths[*code] + extra;
512   fprintf(f, "\n");
513   }
514 }
515 
516 /* End of pcre_printint.src */

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.