NCBI C++ Toolkit Cross Reference

C++/src/util/regexp/pcre_exec.c


  1 /*************************************************
  2 *      Perl-Compatible Regular Expressions       *
  3 *************************************************/
  4 
  5 /* PCRE is a library of functions to support regular expressions whose syntax
  6 and semantics are as close as possible to those of the Perl 5 language.
  7 
  8                        Written by Philip Hazel
  9            Copyright (c) 1997-2009 University of Cambridge
 10 
 11 -----------------------------------------------------------------------------
 12 Redistribution and use in source and binary forms, with or without
 13 modification, are permitted provided that the following conditions are met:
 14 
 15     * Redistributions of source code must retain the above copyright notice,
 16       this list of conditions and the following disclaimer.
 17 
 18     * Redistributions in binary form must reproduce the above copyright
 19       notice, this list of conditions and the following disclaimer in the
 20       documentation and/or other materials provided with the distribution.
 21 
 22     * Neither the name of the University of Cambridge nor the names of its
 23       contributors may be used to endorse or promote products derived from
 24       this software without specific prior written permission.
 25 
 26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 36 POSSIBILITY OF SUCH DAMAGE.
 37 -----------------------------------------------------------------------------
 38 */
 39 
 40 
 41 /* This module contains pcre_exec(), the externally visible function that does
 42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
 43 possible. There are also some static supporting functions. */
 44 
 45 #ifdef HAVE_CONFIG_H
 46 #include "config.h"
 47 #endif
 48 
 49 #define NLBLOCK md             /* Block containing newline information */
 50 #define PSSTART start_subject  /* Field containing processed string start */
 51 #define PSEND   end_subject    /* Field containing processed string end */
 52 
 53 #include "pcre_internal.h"
 54 
 55 /* Undefine some potentially clashing cpp symbols */
 56 
 57 #undef min
 58 #undef max
 59 
 60 /* Flag bits for the match() function */
 61 
 62 #define match_condassert     0x01  /* Called to check a condition assertion */
 63 #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
 64 
 65 /* Non-error returns from the match() function. Error returns are externally
 66 defined PCRE_ERROR_xxx codes, which are all negative. */
 67 
 68 #define MATCH_MATCH        1
 69 #define MATCH_NOMATCH      0
 70 
 71 /* Special internal returns from the match() function. Make them sufficiently
 72 negative to avoid the external error codes. */
 73 
 74 #define MATCH_COMMIT       (-999)
 75 #define MATCH_PRUNE        (-998)
 76 #define MATCH_SKIP         (-997)
 77 #define MATCH_THEN         (-996)
 78 
 79 /* Maximum number of ints of offset to save on the stack for recursive calls.
 80 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
 81 because the offset vector is always a multiple of 3 long. */
 82 
 83 #define REC_STACK_SAVE_MAX 30
 84 
 85 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
 86 
 87 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
 88 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
 89 
 90 
 91 
 92 #ifdef DEBUG
 93 /*************************************************
 94 *        Debugging function to print chars       *
 95 *************************************************/
 96 
 97 /* Print a sequence of chars in printable format, stopping at the end of the
 98 subject if the requested.
 99 
100 Arguments:
101   p           points to characters
102   length      number to print
103   is_subject  TRUE if printing from within md->start_subject
104   md          pointer to matching data block, if is_subject is TRUE
105 
106 Returns:     nothing
107 */
108 
109 static void
110 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
111 {
112 unsigned int c;
113 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
114 while (length-- > 0)
115   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
116 }
117 #endif
118 
119 
120 
121 /*************************************************
122 *          Match a back-reference                *
123 *************************************************/
124 
125 /* If a back reference hasn't been set, the length that is passed is greater
126 than the number of characters left in the string, so the match fails.
127 
128 Arguments:
129   offset      index into the offset vector
130   eptr        points into the subject
131   length      length to be matched
132   md          points to match data block
133   ims         the ims flags
134 
135 Returns:      TRUE if matched
136 */
137 
138 static BOOL
139 match_ref(int offset, register USPTR eptr, int length, match_data *md,
140   unsigned long int ims)
141 {
142 USPTR p = md->start_subject + md->offset_vector[offset];
143 
144 #ifdef DEBUG
145 if (eptr >= md->end_subject)
146   printf("matching subject <null>");
147 else
148   {
149   printf("matching subject ");
150   pchars(eptr, length, TRUE, md);
151   }
152 printf(" against backref ");
153 pchars(p, length, FALSE, md);
154 printf("\n");
155 #endif
156 
157 /* Always fail if not enough characters left */
158 
159 if (length > md->end_subject - eptr) return FALSE;
160 
161 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162 properly if Unicode properties are supported. Otherwise, we can check only
163 ASCII characters. */
164 
165 if ((ims & PCRE_CASELESS) != 0)
166   {
167 #ifdef SUPPORT_UTF8
168 #ifdef SUPPORT_UCP
169   if (md->utf8)
170     {
171     USPTR endptr = eptr + length;
172     while (eptr < endptr)
173       {
174       int c, d;
175       GETCHARINC(c, eptr);
176       GETCHARINC(d, p);
177       if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178       }
179     }
180   else
181 #endif
182 #endif
183 
184   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185   is no UCP support. */
186 
187   while (length-- > 0)
188     { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189   }
190 
191 /* In the caseful case, we can just compare the bytes, whether or not we
192 are in UTF-8 mode. */
193 
194 else
195   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196 
197 return TRUE;
198 }
199 
200 
201 
202 /***************************************************************************
203 ****************************************************************************
204                    RECURSION IN THE match() FUNCTION
205 
206 The match() function is highly recursive, though not every recursive call
207 increases the recursive depth. Nevertheless, some regular expressions can cause
208 it to recurse to a great depth. I was writing for Unix, so I just let it call
209 itself recursively. This uses the stack for saving everything that has to be
210 saved for a recursive call. On Unix, the stack can be large, and this works
211 fine.
212 
213 It turns out that on some non-Unix-like systems there are problems with
214 programs that use a lot of stack. (This despite the fact that every last chip
215 has oodles of memory these days, and techniques for extending the stack have
216 been known for decades.) So....
217 
218 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
219 calls by keeping local variables that need to be preserved in blocks of memory
220 obtained from malloc() instead instead of on the stack. Macros are used to
221 achieve this so that the actual code doesn't look very different to what it
222 always used to.
223 
224 The original heap-recursive code used longjmp(). However, it seems that this
225 can be very slow on some operating systems. Following a suggestion from Stan
226 Switzer, the use of longjmp() has been abolished, at the cost of having to
227 provide a unique number for each call to RMATCH. There is no way of generating
228 a sequence of numbers at compile time in C. I have given them names, to make
229 them stand out more clearly.
230 
231 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
232 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
233 tests. Furthermore, not using longjmp() means that local dynamic variables
234 don't have indeterminate values; this has meant that the frame size can be
235 reduced because the result can be "passed back" by straight setting of the
236 variable instead of being passed in the frame.
237 ****************************************************************************
238 ***************************************************************************/
239 
240 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
241 below must be updated in sync.  */
242 
243 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
244        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
245        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
246        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
247        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
248        RM51,  RM52, RM53, RM54 };
249 
250 /* These versions of the macros use the stack, as normal. There are debugging
251 versions and production versions. Note that the "rw" argument of RMATCH isn't
252 actuall used in this definition. */
253 
254 #ifndef NO_RECURSE
255 #define REGISTER register
256 
257 #ifdef DEBUG
258 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
259   { \
260   printf("match() called in line %d\n", __LINE__); \
261   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
262   printf("to line %d\n", __LINE__); \
263   }
264 #define RRETURN(ra) \
265   { \
266   printf("match() returned %d from line %d ", ra, __LINE__); \
267   return ra; \
268   }
269 #else
270 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
271   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
272 #define RRETURN(ra) return ra
273 #endif
274 
275 #else
276 
277 
278 /* These versions of the macros manage a private stack on the heap. Note that
279 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
280 argument of match(), which never changes. */
281 
282 #define REGISTER
283 
284 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
285   {\
286   heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
287   frame->Xwhere = rw; \
288   newframe->Xeptr = ra;\
289   newframe->Xecode = rb;\
290   newframe->Xmstart = mstart;\
291   newframe->Xoffset_top = rc;\
292   newframe->Xims = re;\
293   newframe->Xeptrb = rf;\
294   newframe->Xflags = rg;\
295   newframe->Xrdepth = frame->Xrdepth + 1;\
296   newframe->Xprevframe = frame;\
297   frame = newframe;\
298   DPRINTF(("restarting from line %d\n", __LINE__));\
299   goto HEAP_RECURSE;\
300   L_##rw:\
301   DPRINTF(("jumped back to line %d\n", __LINE__));\
302   }
303 
304 #define RRETURN(ra)\
305   {\
306   heapframe *newframe = frame;\
307   frame = newframe->Xprevframe;\
308   (pcre_stack_free)(newframe);\
309   if (frame != NULL)\
310     {\
311     rrc = ra;\
312     goto HEAP_RETURN;\
313     }\
314   return ra;\
315   }
316 
317 
318 /* Structure for remembering the local variables in a private frame */
319 
320 typedef struct heapframe {
321   struct heapframe *Xprevframe;
322 
323   /* Function arguments that may change */
324 
325   USPTR Xeptr;
326   const uschar *Xecode;
327   USPTR Xmstart;
328   int Xoffset_top;
329   long int Xims;
330   eptrblock *Xeptrb;
331   int Xflags;
332   unsigned int Xrdepth;
333 
334   /* Function local variables */
335 
336   USPTR Xcallpat;
337 #ifdef SUPPORT_UTF8
338   USPTR Xcharptr;
339 #endif
340   USPTR Xdata;
341   USPTR Xnext;
342   USPTR Xpp;
343   USPTR Xprev;
344   USPTR Xsaved_eptr;
345 
346   recursion_info Xnew_recursive;
347 
348   BOOL Xcur_is_word;
349   BOOL Xcondition;
350   BOOL Xprev_is_word;
351 
352   unsigned long int Xoriginal_ims;
353 
354 #ifdef SUPPORT_UCP
355   int Xprop_type;
356   int Xprop_value;
357   int Xprop_fail_result;
358   int Xprop_category;
359   int Xprop_chartype;
360   int Xprop_script;
361   int Xoclength;
362   uschar Xocchars[8];
363 #endif
364 
365   int Xcodelink;
366   int Xctype;
367   unsigned int Xfc;
368   int Xfi;
369   int Xlength;
370   int Xmax;
371   int Xmin;
372   int Xnumber;
373   int Xoffset;
374   int Xop;
375   int Xsave_capture_last;
376   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
377   int Xstacksave[REC_STACK_SAVE_MAX];
378 
379   eptrblock Xnewptrb;
380 
381   /* Where to jump back to */
382 
383   int Xwhere;
384 
385 } heapframe;
386 
387 #endif
388 
389 
390 /***************************************************************************
391 ***************************************************************************/
392 
393 
394 
395 /*************************************************
396 *         Match from current position            *
397 *************************************************/
398 
399 /* This function is called recursively in many circumstances. Whenever it
400 returns a negative (error) response, the outer incarnation must also return the
401 same response.
402 
403 Performance note: It might be tempting to extract commonly used fields from the
404 md structure (e.g. utf8, end_subject) into individual variables to improve
405 performance. Tests using gcc on a SPARC disproved this; in the first case, it
406 made performance worse.
407 
408 Arguments:
409    eptr        pointer to current character in subject
410    ecode       pointer to current position in compiled code
411    mstart      pointer to the current match start position (can be modified
412                  by encountering \K)
413    offset_top  current top pointer
414    md          pointer to "static" info for the match
415    ims         current /i, /m, and /s options
416    eptrb       pointer to chain of blocks containing eptr at start of
417                  brackets - for testing for empty matches
418    flags       can contain
419                  match_condassert - this is an assertion condition
420                  match_cbegroup - this is the start of an unlimited repeat
421                    group that can match an empty string
422    rdepth      the recursion depth
423 
424 Returns:       MATCH_MATCH if matched            )  these values are >= 0
425                MATCH_NOMATCH if failed to match  )
426                a negative PCRE_ERROR_xxx value if aborted by an error condition
427                  (e.g. stopped by repeated call or recursion limit)
428 */
429 
430 static int
431 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
432   int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
433   int flags, unsigned int rdepth)
434 {
435 /* These variables do not need to be preserved over recursion in this function,
436 so they can be ordinary variables in all cases. Mark some of them with
437 "register" because they are used a lot in loops. */
438 
439 register int  rrc;         /* Returns from recursive calls */
440 register int  i;           /* Used for loops not involving calls to RMATCH() */
441 register unsigned int c;   /* Character values not kept over RMATCH() calls */
442 register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
443 
444 BOOL minimize, possessive; /* Quantifier options */
445 int condcode;
446 
447 /* When recursion is not being used, all "local" variables that have to be
448 preserved over calls to RMATCH() are part of a "frame" which is obtained from
449 heap storage. Set up the top-level frame here; others are obtained from the
450 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
451 
452 #ifdef NO_RECURSE
453 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
454 frame->Xprevframe = NULL;            /* Marks the top level */
455 
456 /* Copy in the original argument variables */
457 
458 frame->Xeptr = eptr;
459 frame->Xecode = ecode;
460 frame->Xmstart = mstart;
461 frame->Xoffset_top = offset_top;
462 frame->Xims = ims;
463 frame->Xeptrb = eptrb;
464 frame->Xflags = flags;
465 frame->Xrdepth = rdepth;
466 
467 /* This is where control jumps back to to effect "recursion" */
468 
469 HEAP_RECURSE:
470 
471 /* Macros make the argument variables come from the current frame */
472 
473 #define eptr               frame->Xeptr
474 #define ecode              frame->Xecode
475 #define mstart             frame->Xmstart
476 #define offset_top         frame->Xoffset_top
477 #define ims                frame->Xims
478 #define eptrb              frame->Xeptrb
479 #define flags              frame->Xflags
480 #define rdepth             frame->Xrdepth
481 
482 /* Ditto for the local variables */
483 
484 #ifdef SUPPORT_UTF8
485 #define charptr            frame->Xcharptr
486 #endif
487 #define callpat            frame->Xcallpat
488 #define codelink           frame->Xcodelink
489 #define data               frame->Xdata
490 #define next               frame->Xnext
491 #define pp                 frame->Xpp
492 #define prev               frame->Xprev
493 #define saved_eptr         frame->Xsaved_eptr
494 
495 #define new_recursive      frame->Xnew_recursive
496 
497 #define cur_is_word        frame->Xcur_is_word
498 #define condition          frame->Xcondition
499 #define prev_is_word       frame->Xprev_is_word
500 
501 #define original_ims       frame->Xoriginal_ims
502 
503 #ifdef SUPPORT_UCP
504 #define prop_type          frame->Xprop_type
505 #define prop_value         frame->Xprop_value
506 #define prop_fail_result   frame->Xprop_fail_result
507 #define prop_category      frame->Xprop_category
508 #define prop_chartype      frame->Xprop_chartype
509 #define prop_script        frame->Xprop_script
510 #define oclength           frame->Xoclength
511 #define occhars            frame->Xocchars
512 #endif
513 
514 #define ctype              frame->Xctype
515 #define fc                 frame->Xfc
516 #define fi                 frame->Xfi
517 #define length             frame->Xlength
518 #define max                frame->Xmax
519 #define min                frame->Xmin
520 #define number             frame->Xnumber
521 #define offset             frame->Xoffset
522 #define op                 frame->Xop
523 #define save_capture_last  frame->Xsave_capture_last
524 #define save_offset1       frame->Xsave_offset1
525 #define save_offset2       frame->Xsave_offset2
526 #define save_offset3       frame->Xsave_offset3
527 #define stacksave          frame->Xstacksave
528 
529 #define newptrb            frame->Xnewptrb
530 
531 /* When recursion is being used, local variables are allocated on the stack and
532 get preserved during recursion in the normal way. In this environment, fi and
533 i, and fc and c, can be the same variables. */
534 
535 #else         /* NO_RECURSE not defined */
536 #define fi i
537 #define fc c
538 
539 
540 #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
541 const uschar *charptr;             /* in small blocks of the code. My normal */
542 #endif                             /* style of coding would have declared    */
543 const uschar *callpat;             /* them within each of those blocks.      */
544 const uschar *data;                /* However, in order to accommodate the   */
545 const uschar *next;                /* version of this code that uses an      */
546 USPTR         pp;                  /* external "stack" implemented on the    */
547 const uschar *prev;                /* heap, it is easier to declare them all */
548 USPTR         saved_eptr;          /* here, so the declarations can be cut   */
549                                    /* out in a block. The only declarations  */
550 recursion_info new_recursive;      /* within blocks below are for variables  */
551                                    /* that do not have to be preserved over  */
552 BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
553 BOOL condition;
554 BOOL prev_is_word;
555 
556 unsigned long int original_ims;
557 
558 #ifdef SUPPORT_UCP
559 int prop_type;
560 int prop_value;
561 int prop_fail_result;
562 int prop_category;
563 int prop_chartype;
564 int prop_script;
565 int oclength;
566 uschar occhars[8];
567 #endif
568 
569 int codelink;
570 int ctype;
571 int length;
572 int max;
573 int min;
574 int number;
575 int offset;
576 int op;
577 int save_capture_last;
578 int save_offset1, save_offset2, save_offset3;
579 int stacksave[REC_STACK_SAVE_MAX];
580 
581 eptrblock newptrb;
582 #endif     /* NO_RECURSE */
583 
584 /* These statements are here to stop the compiler complaining about unitialized
585 variables. */
586 
587 #ifdef SUPPORT_UCP
588 prop_value = 0;
589 prop_fail_result = 0;
590 #endif
591 
592 
593 /* This label is used for tail recursion, which is used in a few cases even
594 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
595 used. Thanks to Ian Taylor for noticing this possibility and sending the
596 original patch. */
597 
598 TAIL_RECURSE:
599 
600 /* OK, now we can get on with the real code of the function. Recursive calls
601 are specified by the macro RMATCH and RRETURN is used to return. When
602 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
603 and a "return", respectively (possibly with some debugging if DEBUG is
604 defined). However, RMATCH isn't like a function call because it's quite a
605 complicated macro. It has to be used in one particular way. This shouldn't,
606 however, impact performance when true recursion is being used. */
607 
608 #ifdef SUPPORT_UTF8
609 utf8 = md->utf8;       /* Local copy of the flag */
610 #else
611 utf8 = FALSE;
612 #endif
613 
614 /* First check that we haven't called match() too many times, or that we
615 haven't exceeded the recursive call limit. */
616 
617 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
618 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
619 
620 original_ims = ims;    /* Save for resetting on ')' */
621 
622 /* At the start of a group with an unlimited repeat that may match an empty
623 string, the match_cbegroup flag is set. When this is the case, add the current
624 subject pointer to the chain of such remembered pointers, to be checked when we
625 hit the closing ket, in order to break infinite loops that match no characters.
626 When match() is called in other circumstances, don't add to the chain. The
627 match_cbegroup flag must NOT be used with tail recursion, because the memory
628 block that is used is on the stack, so a new one may be required for each
629 match(). */
630 
631 if ((flags & match_cbegroup) != 0)
632   {
633   newptrb.epb_saved_eptr = eptr;
634   newptrb.epb_prev = eptrb;
635   eptrb = &newptrb;
636   }
637 
638 /* Now start processing the opcodes. */
639 
640 for (;;)
641   {
642   minimize = possessive = FALSE;
643   op = *ecode;
644 
645   /* For partial matching, remember if we ever hit the end of the subject after
646   matching at least one subject character. */
647 
648   if (md->partial &&
649       eptr >= md->end_subject &&
650       eptr > mstart)
651     md->hitend = TRUE;
652 
653   switch(op)
654     {
655     case OP_FAIL:
656     RRETURN(MATCH_NOMATCH);
657 
658     case OP_PRUNE:
659     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
660       ims, eptrb, flags, RM51);
661     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
662     RRETURN(MATCH_PRUNE);
663 
664     case OP_COMMIT:
665     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
666       ims, eptrb, flags, RM52);
667     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
668     RRETURN(MATCH_COMMIT);
669 
670     case OP_SKIP:
671     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
672       ims, eptrb, flags, RM53);
673     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
674     md->start_match_ptr = eptr;   /* Pass back current position */
675     RRETURN(MATCH_SKIP);
676 
677     case OP_THEN:
678     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
679       ims, eptrb, flags, RM54);
680     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
681     RRETURN(MATCH_THEN);
682 
683     /* Handle a capturing bracket. If there is space in the offset vector, save
684     the current subject position in the working slot at the top of the vector.
685     We mustn't change the current values of the data slot, because they may be
686     set from a previous iteration of this group, and be referred to by a
687     reference inside the group.
688 
689     If the bracket fails to match, we need to restore this value and also the
690     values of the final offsets, in case they were set by a previous iteration
691     of the same bracket.
692 
693     If there isn't enough space in the offset vector, treat this as if it were
694     a non-capturing bracket. Don't worry about setting the flag for the error
695     case here; that is handled in the code for KET. */
696 
697     case OP_CBRA:
698     case OP_SCBRA:
699     number = GET2(ecode, 1+LINK_SIZE);
700     offset = number << 1;
701 
702 #ifdef DEBUG
703     printf("start bracket %d\n", number);
704     printf("subject=");
705     pchars(eptr, 16, TRUE, md);
706     printf("\n");
707 #endif
708 
709     if (offset < md->offset_max)
710       {
711       save_offset1 = md->offset_vector[offset];
712       save_offset2 = md->offset_vector[offset+1];
713       save_offset3 = md->offset_vector[md->offset_end - number];
714       save_capture_last = md->capture_last;
715 
716       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
717       md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
718 
719       flags = (op == OP_SCBRA)? match_cbegroup : 0;
720       do
721         {
722         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
723           ims, eptrb, flags, RM1);
724         if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
725         md->capture_last = save_capture_last;
726         ecode += GET(ecode, 1);
727         }
728       while (*ecode == OP_ALT);
729 
730       DPRINTF(("bracket %d failed\n", number));
731 
732       md->offset_vector[offset] = save_offset1;
733       md->offset_vector[offset+1] = save_offset2;
734       md->offset_vector[md->offset_end - number] = save_offset3;
735 
736       RRETURN(MATCH_NOMATCH);
737       }
738 
739     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
740     as a non-capturing bracket. */
741 
742     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
743     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
744 
745     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
746 
747     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
748     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
749 
750     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
751     final alternative within the brackets, we would return the result of a
752     recursive call to match() whatever happened. We can reduce stack usage by
753     turning this into a tail recursion, except in the case when match_cbegroup
754     is set.*/
755 
756     case OP_BRA:
757     case OP_SBRA:
758     DPRINTF(("start non-capturing bracket\n"));
759     flags = (op >= OP_SBRA)? match_cbegroup : 0;
760     for (;;)
761       {
762       if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
763         {
764         if (flags == 0)    /* Not a possibly empty group */
765           {
766           ecode += _pcre_OP_lengths[*ecode];
767           DPRINTF(("bracket 0 tail recursion\n"));
768           goto TAIL_RECURSE;
769           }
770 
771         /* Possibly empty group; can't use tail recursion. */
772 
773         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
774           eptrb, flags, RM48);
775         RRETURN(rrc);
776         }
777 
778       /* For non-final alternatives, continue the loop for a NOMATCH result;
779       otherwise return. */
780 
781       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
782         eptrb, flags, RM2);
783       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
784       ecode += GET(ecode, 1);
785       }
786     /* Control never reaches here. */
787 
788     /* Conditional group: compilation checked that there are no more than
789     two branches. If the condition is false, skipping the first branch takes us
790     past the end if there is only one branch, but that's OK because that is
791     exactly what going to the ket would do. As there is only one branch to be
792     obeyed, we can use tail recursion to avoid using another stack frame. */
793 
794     case OP_COND:
795     case OP_SCOND:
796     codelink= GET(ecode, 1);
797 
798     /* Because of the way auto-callout works during compile, a callout item is
799     inserted between OP_COND and an assertion condition. */
800 
801     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
802       {
803       if (pcre_callout != NULL)
804         {
805         pcre_callout_block cb;
806         cb.version          = 1;   /* Version 1 of the callout block */
807         cb.callout_number   = ecode[LINK_SIZE+2];
808         cb.offset_vector    = md->offset_vector;
809         cb.subject          = (PCRE_SPTR)md->start_subject;
810         cb.subject_length   = md->end_subject - md->start_subject;
811         cb.start_match      = mstart - md->start_subject;
812         cb.current_position = eptr - md->start_subject;
813         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
814         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
815         cb.capture_top      = offset_top/2;
816         cb.capture_last     = md->capture_last;
817         cb.callout_data     = md->callout_data;
818         if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
819         if (rrc < 0) RRETURN(rrc);
820         }
821       ecode += _pcre_OP_lengths[OP_CALLOUT];
822       }
823 
824     condcode = ecode[LINK_SIZE+1];
825 
826     /* Now see what the actual condition is */
827 
828     if (condcode == OP_RREF)         /* Recursion test */
829       {
830       offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
831       condition = md->recursive != NULL &&
832         (offset == RREF_ANY || offset == md->recursive->group_num);
833       ecode += condition? 3 : GET(ecode, 1);
834       }
835 
836     else if (condcode == OP_CREF)    /* Group used test */
837       {
838       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
839       condition = offset < offset_top && md->offset_vector[offset] >= 0;
840       ecode += condition? 3 : GET(ecode, 1);
841       }
842 
843     else if (condcode == OP_DEF)     /* DEFINE - always false */
844       {
845       condition = FALSE;
846       ecode += GET(ecode, 1);
847       }
848 
849     /* The condition is an assertion. Call match() to evaluate it - setting
850     the final argument match_condassert causes it to stop at the end of an
851     assertion. */
852 
853     else
854       {
855       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
856           match_condassert, RM3);
857       if (rrc == MATCH_MATCH)
858         {
859         condition = TRUE;
860         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
861         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
862         }
863       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
864         {
865         RRETURN(rrc);         /* Need braces because of following else */
866         }
867       else
868         {
869         condition = FALSE;
870         ecode += codelink;
871         }
872       }
873 
874     /* We are now at the branch that is to be obeyed. As there is only one,
875     we can use tail recursion to avoid using another stack frame, except when
876     match_cbegroup is required for an unlimited repeat of a possibly empty
877     group. If the second alternative doesn't exist, we can just plough on. */
878 
879     if (condition || *ecode == OP_ALT)
880       {
881       ecode += 1 + LINK_SIZE;
882       if (op == OP_SCOND)        /* Possibly empty group */
883         {
884         RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
885         RRETURN(rrc);
886         }
887       else                       /* Group must match something */
888         {
889         flags = 0;
890         goto TAIL_RECURSE;
891         }
892       }
893     else                         /* Condition false & no alternative */
894       {
895       ecode += 1 + LINK_SIZE;
896       }
897     break;
898 
899 
900     /* End of the pattern, either real or forced. If we are in a top-level
901     recursion, we should restore the offsets appropriately and continue from
902     after the call. */
903 
904     case OP_ACCEPT:
905     case OP_END:
906     if (md->recursive != NULL && md->recursive->group_num == 0)
907       {
908       recursion_info *rec = md->recursive;
909       DPRINTF(("End of pattern in a (?0) recursion\n"));
910       md->recursive = rec->prevrec;
911       memmove(md->offset_vector, rec->offset_save,
912         rec->saved_max * sizeof(int));
913       mstart = rec->save_start;
914       ims = original_ims;
915       ecode = rec->after_call;
916       break;
917       }
918 
919     /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
920     string - backtracking will then try other alternatives, if any. */
921 
922     if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
923     md->end_match_ptr = eptr;           /* Record where we ended */
924     md->end_offset_top = offset_top;    /* and how many extracts were taken */
925     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
926     RRETURN(MATCH_MATCH);
927 
928     /* Change option settings */
929 
930     case OP_OPT:
931     ims = ecode[1];
932     ecode += 2;
933     DPRINTF(("ims set to %02lx\n", ims));
934     break;
935 
936     /* Assertion brackets. Check the alternative branches in turn - the
937     matching won't pass the KET for an assertion. If any one branch matches,
938     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
939     start of each branch to move the current point backwards, so the code at
940     this level is identical to the lookahead case. */
941 
942     case OP_ASSERT:
943     case OP_ASSERTBACK:
944     do
945       {
946       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
947         RM4);
948       if (rrc == MATCH_MATCH) break;
949       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
950       ecode += GET(ecode, 1);
951       }
952     while (*ecode == OP_ALT);
953     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
954 
955     /* If checking an assertion for a condition, return MATCH_MATCH. */
956 
957     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
958 
959     /* Continue from after the assertion, updating the offsets high water
960     mark, since extracts may have been taken during the assertion. */
961 
962     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
963     ecode += 1 + LINK_SIZE;
964     offset_top = md->end_offset_top;
965     continue;
966 
967     /* Negative assertion: all branches must fail to match */
968 
969     case OP_ASSERT_NOT:
970     case OP_ASSERTBACK_NOT:
971     do
972       {
973       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
974         RM5);
975       if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
976       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
977       ecode += GET(ecode,1);
978       }
979     while (*ecode == OP_ALT);
980 
981     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
982 
983     ecode += 1 + LINK_SIZE;
984     continue;
985 
986     /* Move the subject pointer back. This occurs only at the start of
987     each branch of a lookbehind assertion. If we are too close to the start to
988     move back, this match function fails. When working with UTF-8 we move
989     back a number of characters, not bytes. */
990 
991     case OP_REVERSE:
992 #ifdef SUPPORT_UTF8
993     if (utf8)
994       {
995       i = GET(ecode, 1);
996       while (i-- > 0)
997         {
998         eptr--;
999         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1000         BACKCHAR(eptr);
1001         }
1002       }
1003     else
1004 #endif
1005 
1006     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1007 
1008       {
1009       eptr -= GET(ecode, 1);
1010       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1011       }
1012 
1013     /* Skip to next op code */
1014 
1015     ecode += 1 + LINK_SIZE;
1016     break;
1017 
1018     /* The callout item calls an external function, if one is provided, passing
1019     details of the match so far. This is mainly for debugging, though the
1020     function is able to force a failure. */
1021 
1022     case OP_CALLOUT:
1023     if (pcre_callout != NULL)
1024       {
1025       pcre_callout_block cb;
1026       cb.version          = 1;   /* Version 1 of the callout block */
1027       cb.callout_number   = ecode[1];
1028       cb.offset_vector    = md->offset_vector;
1029       cb.subject          = (PCRE_SPTR)md->start_subject;
1030       cb.subject_length   = md->end_subject - md->start_subject;
1031       cb.start_match      = mstart - md->start_subject;
1032       cb.current_position = eptr - md->start_subject;
1033       cb.pattern_position = GET(ecode, 2);
1034       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1035       cb.capture_top      = offset_top/2;
1036       cb.capture_last     = md->capture_last;
1037       cb.callout_data     = md->callout_data;
1038       if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1039       if (rrc < 0) RRETURN(rrc);
1040       }
1041     ecode += 2 + 2*LINK_SIZE;
1042     break;
1043 
1044     /* Recursion either matches the current regex, or some subexpression. The
1045     offset data is the offset to the starting bracket from the start of the
1046     whole pattern. (This is so that it works from duplicated subpatterns.)
1047 
1048     If there are any capturing brackets started but not finished, we have to
1049     save their starting points and reinstate them after the recursion. However,
1050     we don't know how many such there are (offset_top records the completed
1051     total) so we just have to save all the potential data. There may be up to
1052     65535 such values, which is too large to put on the stack, but using malloc
1053     for small numbers seems expensive. As a compromise, the stack is used when
1054     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1055     is used. A problem is what to do if the malloc fails ... there is no way of
1056     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1057     values on the stack, and accept that the rest may be wrong.
1058 
1059     There are also other values that have to be saved. We use a chained
1060     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1061     for the original version of this logic. */
1062 
1063     case OP_RECURSE:
1064       {
1065       callpat = md->start_code + GET(ecode, 1);
1066       new_recursive.group_num = (callpat == md->start_code)? 0 :
1067         GET2(callpat, 1 + LINK_SIZE);
1068 
1069       /* Add to "recursing stack" */
1070 
1071       new_recursive.prevrec = md->recursive;
1072       md->recursive = &new_recursive;
1073 
1074       /* Find where to continue from afterwards */
1075 
1076       ecode += 1 + LINK_SIZE;
1077       new_recursive.after_call = ecode;
1078 
1079       /* Now save the offset data. */
1080 
1081       new_recursive.saved_max = md->offset_end;
1082       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1083         new_recursive.offset_save = stacksave;
1084       else
1085         {
1086         new_recursive.offset_save =
1087           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1088         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1089         }
1090 
1091       memcpy(new_recursive.offset_save, md->offset_vector,
1092             new_recursive.saved_max * sizeof(int));
1093       new_recursive.save_start = mstart;
1094       mstart = eptr;
1095 
1096       /* OK, now we can do the recursion. For each top-level alternative we
1097       restore the offset and recursion data. */
1098 
1099       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1100       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1101       do
1102         {
1103         RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1104           md, ims, eptrb, flags, RM6);
1105         if (rrc == MATCH_MATCH)
1106           {
1107           DPRINTF(("Recursion matched\n"));
1108           md->recursive = new_recursive.prevrec;
1109           if (new_recursive.offset_save != stacksave)
1110             (pcre_free)(new_recursive.offset_save);
1111           RRETURN(MATCH_MATCH);
1112           }
1113         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1114           {
1115           DPRINTF(("Recursion gave error %d\n", rrc));
1116           if (new_recursive.offset_save != stacksave)
1117             (pcre_free)(new_recursive.offset_save);
1118           RRETURN(rrc);
1119           }
1120 
1121         md->recursive = &new_recursive;
1122         memcpy(md->offset_vector, new_recursive.offset_save,
1123             new_recursive.saved_max * sizeof(int));
1124         callpat += GET(callpat, 1);
1125         }
1126       while (*callpat == OP_ALT);
1127 
1128       DPRINTF(("Recursion didn't match\n"));
1129       md->recursive = new_recursive.prevrec;
1130       if (new_recursive.offset_save != stacksave)
1131         (pcre_free)(new_recursive.offset_save);
1132       RRETURN(MATCH_NOMATCH);
1133       }
1134     /* Control never reaches here */
1135 
1136     /* "Once" brackets are like assertion brackets except that after a match,
1137     the point in the subject string is not moved back. Thus there can never be
1138     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1139     Check the alternative branches in turn - the matching won't pass the KET
1140     for this kind of subpattern. If any one branch matches, we carry on as at
1141     the end of a normal bracket, leaving the subject pointer. */
1142 
1143     case OP_ONCE:
1144     prev = ecode;
1145     saved_eptr = eptr;
1146 
1147     do
1148       {
1149       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1150       if (rrc == MATCH_MATCH) break;
1151       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1152       ecode += GET(ecode,1);
1153       }
1154     while (*ecode == OP_ALT);
1155 
1156     /* If hit the end of the group (which could be repeated), fail */
1157 
1158     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1159 
1160     /* Continue as from after the assertion, updating the offsets high water
1161     mark, since extracts may have been taken. */
1162 
1163     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1164 
1165     offset_top = md->end_offset_top;
1166     eptr = md->end_match_ptr;
1167 
1168     /* For a non-repeating ket, just continue at this level. This also
1169     happens for a repeating ket if no characters were matched in the group.
1170     This is the forcible breaking of infinite loops as implemented in Perl
1171     5.005. If there is an options reset, it will get obeyed in the normal
1172     course of events. */
1173 
1174     if (*ecode == OP_KET || eptr == saved_eptr)
1175       {
1176       ecode += 1+LINK_SIZE;
1177       break;
1178       }
1179 
1180     /* The repeating kets try the rest of the pattern or restart from the
1181     preceding bracket, in the appropriate order. The second "call" of match()
1182     uses tail recursion, to avoid using another stack frame. We need to reset
1183     any options that changed within the bracket before re-running it, so
1184     check the next opcode. */
1185 
1186     if (ecode[1+LINK_SIZE] == OP_OPT)
1187       {
1188       ims = (ims & ~PCRE_IMS) | ecode[4];
1189       DPRINTF(("ims set to %02lx at group repeat\n", ims));
1190       }
1191 
1192     if (*ecode == OP_KETRMIN)
1193       {
1194       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1195       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1196       ecode = prev;
1197       flags = 0;
1198       goto TAIL_RECURSE;
1199       }
1200     else  /* OP_KETRMAX */
1201       {
1202       RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1203       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1204       ecode += 1 + LINK_SIZE;
1205       flags = 0;
1206       goto TAIL_RECURSE;
1207       }
1208     /* Control never gets here */
1209 
1210     /* An alternation is the end of a branch; scan along to find the end of the
1211     bracketed group and go to there. */
1212 
1213     case OP_ALT:
1214     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1215     break;
1216 
1217     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1218     indicating that it may occur zero times. It may repeat infinitely, or not
1219     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1220     with fixed upper repeat limits are compiled as a number of copies, with the
1221     optional ones preceded by BRAZERO or BRAMINZERO. */
1222 
1223     case OP_BRAZERO:
1224       {
1225       next = ecode+1;
1226       RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1227       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1228       do next += GET(next,1); while (*next == OP_ALT);
1229       ecode = next + 1 + LINK_SIZE;
1230       }
1231     break;
1232 
1233     case OP_BRAMINZERO:
1234       {
1235       next = ecode+1;
1236       do next += GET(next, 1); while (*next == OP_ALT);
1237       RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1238       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1239       ecode++;
1240       }
1241     break;
1242 
1243     case OP_SKIPZERO:
1244       {
1245       next = ecode+1;
1246       do next += GET(next,1); while (*next == OP_ALT);
1247       ecode = next + 1 + LINK_SIZE;
1248       }
1249     break;
1250 
1251     /* End of a group, repeated or non-repeating. */
1252 
1253     case OP_KET:
1254     case OP_KETRMIN:
1255     case OP_KETRMAX:
1256     prev = ecode - GET(ecode, 1);
1257 
1258     /* If this was a group that remembered the subject start, in order to break
1259     infinite repeats of empty string matches, retrieve the subject start from
1260     the chain. Otherwise, set it NULL. */
1261 
1262     if (*prev >= OP_SBRA)
1263       {
1264       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1265       eptrb = eptrb->epb_prev;              /* Backup to previous group */
1266       }
1267     else saved_eptr = NULL;
1268 
1269     /* If we are at the end of an assertion group, stop matching and return
1270     MATCH_MATCH, but record the current high water mark for use by positive
1271     assertions. Do this also for the "once" (atomic) groups. */
1272 
1273     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1274         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1275         *prev == OP_ONCE)
1276       {
1277       md->end_match_ptr = eptr;      /* For ONCE */
1278       md->end_offset_top = offset_top;
1279       RRETURN(MATCH_MATCH);
1280       }
1281 
1282     /* For capturing groups we have to check the group number back at the start
1283     and if necessary complete handling an extraction by setting the offsets and
1284     bumping the high water mark. Note that whole-pattern recursion is coded as
1285     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1286     when the OP_END is reached. Other recursion is handled here. */
1287 
1288     if (*prev == OP_CBRA || *prev == OP_SCBRA)
1289       {
1290       number = GET2(prev, 1+LINK_SIZE);
1291       offset = number << 1;
1292 
1293 #ifdef DEBUG
1294       printf("end bracket %d", number);
1295       printf("\n");
1296 #endif
1297 
1298       md->capture_last = number;
1299       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1300         {
1301         md->offset_vector[offset] =
1302           md->offset_vector[md->offset_end - number];
1303         md->offset_vector[offset+1] = eptr - md->start_subject;
1304         if (offset_top <= offset) offset_top = offset + 2;
1305         }
1306 
1307       /* Handle a recursively called group. Restore the offsets
1308       appropriately and continue from after the call. */
1309 
1310       if (md->recursive != NULL && md->recursive->group_num == number)
1311         {
1312         recursion_info *rec = md->recursive;
1313         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1314         md->recursive = rec->prevrec;
1315         mstart = rec->save_start;
1316         memcpy(md->offset_vector, rec->offset_save,
1317           rec->saved_max * sizeof(int));
1318         ecode = rec->after_call;
1319         ims = original_ims;
1320         break;
1321         }
1322       }
1323 
1324     /* For both capturing and non-capturing groups, reset the value of the ims
1325     flags, in case they got changed during the group. */
1326 
1327     ims = original_ims;
1328     DPRINTF(("ims reset to %02lx\n", ims));
1329 
1330     /* For a non-repeating ket, just continue at this level. This also
1331     happens for a repeating ket if no characters were matched in the group.
1332     This is the forcible breaking of infinite loops as implemented in Perl
1333     5.005. If there is an options reset, it will get obeyed in the normal
1334     course of events. */
1335 
1336     if (*ecode == OP_KET || eptr == saved_eptr)
1337       {
1338       ecode += 1 + LINK_SIZE;
1339       break;
1340       }
1341 
1342     /* The repeating kets try the rest of the pattern or restart from the
1343     preceding bracket, in the appropriate order. In the second case, we can use
1344     tail recursion to avoid using another stack frame, unless we have an
1345     unlimited repeat of a group that can match an empty string. */
1346 
1347     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1348 
1349     if (*ecode == OP_KETRMIN)
1350       {
1351       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1352       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1353       if (flags != 0)    /* Could match an empty string */
1354         {
1355         RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1356         RRETURN(rrc);
1357         }
1358       ecode = prev;
1359       goto TAIL_RECURSE;
1360       }
1361     else  /* OP_KETRMAX */
1362       {
1363       RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1364       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1365       ecode += 1 + LINK_SIZE;
1366       flags = 0;
1367       goto TAIL_RECURSE;
1368       }
1369     /* Control never gets here */
1370 
1371     /* Start of subject unless notbol, or after internal newline if multiline */
1372 
1373     case OP_CIRC:
1374     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1375     if ((ims & PCRE_MULTILINE) != 0)
1376       {
1377       if (eptr != md->start_subject &&
1378           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1379         RRETURN(MATCH_NOMATCH);
1380       ecode++;
1381       break;
1382       }
1383     /* ... else fall through */
1384 
1385     /* Start of subject assertion */
1386 
1387     case OP_SOD:
1388     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1389     ecode++;
1390     break;
1391 
1392     /* Start of match assertion */
1393 
1394     case OP_SOM:
1395     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1396     ecode++;
1397     break;
1398 
1399     /* Reset the start of match point */
1400 
1401     case OP_SET_SOM:
1402     mstart = eptr;
1403     ecode++;
1404     break;
1405 
1406     /* Assert before internal newline if multiline, or before a terminating
1407     newline unless endonly is set, else end of subject unless noteol is set. */
1408 
1409     case OP_DOLL:
1410     if ((ims & PCRE_MULTILINE) != 0)
1411       {
1412       if (eptr < md->end_subject)
1413         { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1414       else
1415         { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1416       ecode++;
1417       break;
1418       }
1419     else
1420       {
1421       if (md->noteol) RRETURN(MATCH_NOMATCH);
1422       if (!md->endonly)
1423         {
1424         if (eptr != md->end_subject &&
1425             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1426           RRETURN(MATCH_NOMATCH);
1427         ecode++;
1428         break;
1429         }
1430       }
1431     /* ... else fall through for endonly */
1432 
1433     /* End of subject assertion (\z) */
1434 
1435     case OP_EOD:
1436     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1437     ecode++;
1438     break;
1439 
1440     /* End of subject or ending \n assertion (\Z) */
1441 
1442     case OP_EODN:
1443     if (eptr != md->end_subject &&
1444         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1445       RRETURN(MATCH_NOMATCH);
1446     ecode++;
1447     break;
1448 
1449     /* Word boundary assertions */
1450 
1451     case OP_NOT_WORD_BOUNDARY:
1452     case OP_WORD_BOUNDARY:
1453       {
1454 
1455       /* Find out if the previous and current characters are "word" characters.
1456       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1457       be "non-word" characters. */
1458 
1459 #ifdef SUPPORT_UTF8
1460       if (utf8)
1461         {
1462         if (eptr == md->start_subject) prev_is_word = FALSE; else
1463           {
1464           USPTR lastptr = eptr - 1;
1465           while((*lastptr & 0xc0) == 0x80) lastptr--;
1466           GETCHAR(c, lastptr);
1467           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1468           }
1469         if (eptr >= md->end_subject) cur_is_word = FALSE; else
1470           {
1471           GETCHAR(c, eptr);
1472           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1473           }
1474         }
1475       else
1476 #endif
1477 
1478       /* More streamlined when not in UTF-8 mode */
1479 
1480         {
1481         prev_is_word = (eptr != md->start_subject) &&
1482           ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1483         cur_is_word = (eptr < md->end_subject) &&
1484           ((md->ctypes[*eptr] & ctype_word) != 0);
1485         }
1486 
1487       /* Now see if the situation is what we want */
1488 
1489       if ((*ecode++ == OP_WORD_BOUNDARY)?
1490            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1491         RRETURN(MATCH_NOMATCH);
1492       }
1493     break;
1494 
1495     /* Match a single character type; inline for speed */
1496 
1497     case OP_ANY:
1498     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1499     /* Fall through */
1500 
1501     case OP_ALLANY:
1502     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1503     if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1504     ecode++;
1505     break;
1506 
1507     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1508     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1509 
1510     case OP_ANYBYTE:
1511     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1512     ecode++;
1513     break;
1514 
1515     case OP_NOT_DIGIT:
1516     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1517     GETCHARINCTEST(c, eptr);
1518     if (
1519 #ifdef SUPPORT_UTF8
1520        c < 256 &&
1521 #endif
1522        (md->ctypes[c] & ctype_digit) != 0
1523        )
1524       RRETURN(MATCH_NOMATCH);
1525     ecode++;
1526     break;
1527 
1528     case OP_DIGIT:
1529     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1530     GETCHARINCTEST(c, eptr);
1531     if (
1532 #ifdef SUPPORT_UTF8
1533        c >= 256 ||
1534 #endif
1535        (md->ctypes[c] & ctype_digit) == 0
1536        )
1537       RRETURN(MATCH_NOMATCH);
1538     ecode++;
1539     break;
1540 
1541     case OP_NOT_WHITESPACE:
1542     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1543     GETCHARINCTEST(c, eptr);
1544     if (
1545 #ifdef SUPPORT_UTF8
1546        c < 256 &&
1547 #endif
1548        (md->ctypes[c] & ctype_space) != 0
1549        )
1550       RRETURN(MATCH_NOMATCH);
1551     ecode++;
1552     break;
1553 
1554     case OP_WHITESPACE:
1555     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1556     GETCHARINCTEST(c, eptr);
1557     if (
1558 #ifdef SUPPORT_UTF8
1559        c >= 256 ||
1560 #endif
1561        (md->ctypes[c] & ctype_space) == 0
1562        )
1563       RRETURN(MATCH_NOMATCH);
1564     ecode++;
1565     break;
1566 
1567     case OP_NOT_WORDCHAR:
1568     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1569     GETCHARINCTEST(c, eptr);
1570     if (
1571 #ifdef SUPPORT_UTF8
1572        c < 256 &&
1573 #endif
1574        (md->ctypes[c] & ctype_word) != 0
1575        )
1576       RRETURN(MATCH_NOMATCH);
1577     ecode++;
1578     break;
1579 
1580     case OP_WORDCHAR:
1581     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1582     GETCHARINCTEST(c, eptr);
1583     if (
1584 #ifdef SUPPORT_UTF8
1585        c >= 256 ||
1586 #endif
1587        (md->ctypes[c] & ctype_word) == 0
1588        )
1589       RRETURN(MATCH_NOMATCH);
1590     ecode++;
1591     break;
1592 
1593     case OP_ANYNL:
1594     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1595     GETCHARINCTEST(c, eptr);
1596     switch(c)
1597       {
1598       default: RRETURN(MATCH_NOMATCH);
1599       case 0x000d:
1600       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1601       break;
1602 
1603       case 0x000a:
1604       break;
1605 
1606       case 0x000b:
1607       case 0x000c:
1608       case 0x0085:
1609       case 0x2028:
1610       case 0x2029:
1611       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1612       break;
1613       }
1614     ecode++;
1615     break;
1616 
1617     case OP_NOT_HSPACE:
1618     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1619     GETCHARINCTEST(c, eptr);
1620     switch(c)
1621       {
1622       default: break;
1623       case 0x09:      /* HT */
1624       case 0x20:      /* SPACE */
1625       case 0xa0:      /* NBSP */
1626       case 0x1680:    /* OGHAM SPACE MARK */
1627       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1628       case 0x2000:    /* EN QUAD */
1629       case 0x2001:    /* EM QUAD */
1630       case 0x2002:    /* EN SPACE */
1631       case 0x2003:    /* EM SPACE */
1632       case 0x2004:    /* THREE-PER-EM SPACE */
1633       case 0x2005:    /* FOUR-PER-EM SPACE */
1634       case 0x2006:    /* SIX-PER-EM SPACE */
1635       case 0x2007:    /* FIGURE SPACE */
1636       case 0x2008:    /* PUNCTUATION SPACE */
1637       case 0x2009:    /* THIN SPACE */
1638       case 0x200A:    /* HAIR SPACE */
1639       case 0x202f:    /* NARROW NO-BREAK SPACE */
1640       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1641       case 0x3000:    /* IDEOGRAPHIC SPACE */
1642       RRETURN(MATCH_NOMATCH);
1643       }
1644     ecode++;
1645     break;
1646 
1647     case OP_HSPACE:
1648     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1649     GETCHARINCTEST(c, eptr);
1650     switch(c)
1651       {
1652       default: RRETURN(MATCH_NOMATCH);
1653       case 0x09:      /* HT */
1654       case 0x20:      /* SPACE */
1655       case 0xa0:      /* NBSP */
1656       case 0x1680:    /* OGHAM SPACE MARK */
1657       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1658       case 0x2000:    /* EN QUAD */
1659       case 0x2001:    /* EM QUAD */
1660       case 0x2002:    /* EN SPACE */
1661       case 0x2003:    /* EM SPACE */
1662       case 0x2004:    /* THREE-PER-EM SPACE */
1663       case 0x2005:    /* FOUR-PER-EM SPACE */
1664       case 0x2006:    /* SIX-PER-EM SPACE */
1665       case 0x2007:    /* FIGURE SPACE */
1666       case 0x2008:    /* PUNCTUATION SPACE */
1667       case 0x2009:    /* THIN SPACE */
1668       case 0x200A:    /* HAIR SPACE */
1669       case 0x202f:    /* NARROW NO-BREAK SPACE */
1670       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1671       case 0x3000:    /* IDEOGRAPHIC SPACE */
1672       break;
1673       }
1674     ecode++;
1675     break;
1676 
1677     case OP_NOT_VSPACE:
1678     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1679     GETCHARINCTEST(c, eptr);
1680     switch(c)
1681       {
1682       default: break;
1683       case 0x0a:      /* LF */
1684       case 0x0b:      /* VT */
1685       case 0x0c:      /* FF */
1686       case 0x0d:      /* CR */
1687       case 0x85:      /* NEL */
1688       case 0x2028:    /* LINE SEPARATOR */
1689       case 0x2029:    /* PARAGRAPH SEPARATOR */
1690       RRETURN(MATCH_NOMATCH);
1691       }
1692     ecode++;
1693     break;
1694 
1695     case OP_VSPACE:
1696     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1697     GETCHARINCTEST(c, eptr);
1698     switch(c)
1699       {
1700       default: RRETURN(MATCH_NOMATCH);
1701       case 0x0a:      /* LF */
1702       case 0x0b:      /* VT */
1703       case 0x0c:      /* FF */
1704       case 0x0d:      /* CR */
1705       case 0x85:      /* NEL */
1706       case 0x2028:    /* LINE SEPARATOR */
1707       case 0x2029:    /* PARAGRAPH SEPARATOR */
1708       break;
1709       }
1710     ecode++;
1711     break;
1712 
1713 #ifdef SUPPORT_UCP
1714     /* Check the next character by Unicode property. We will get here only
1715     if the support is in the binary; otherwise a compile-time error occurs. */
1716 
1717     case OP_PROP:
1718     case OP_NOTPROP:
1719     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1720     GETCHARINCTEST(c, eptr);
1721       {
1722       const ucd_record *prop = GET_UCD(c);
1723 
1724       switch(ecode[1])
1725         {
1726         case PT_ANY:
1727         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1728         break;
1729 
1730         case PT_LAMP:
1731         if ((prop->chartype == ucp_Lu ||
1732              prop->chartype == ucp_Ll ||
1733              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1734           RRETURN(MATCH_NOMATCH);
1735          break;
1736 
1737         case PT_GC:
1738         if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1739           RRETURN(MATCH_NOMATCH);
1740         break;
1741 
1742         case PT_PC:
1743         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1744           RRETURN(MATCH_NOMATCH);
1745         break;
1746 
1747         case PT_SC:
1748         if ((ecode[2] != prop->script) == (op == OP_PROP))
1749           RRETURN(MATCH_NOMATCH);
1750         break;
1751 
1752         default:
1753         RRETURN(PCRE_ERROR_INTERNAL);
1754         }
1755 
1756       ecode += 3;
1757       }
1758     break;
1759 
1760     /* Match an extended Unicode sequence. We will get here only if the support
1761     is in the binary; otherwise a compile-time error occurs. */
1762 
1763     case OP_EXTUNI:
1764     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1765     GETCHARINCTEST(c, eptr);
1766       {
1767       int category = UCD_CATEGORY(c);
1768       if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1769       while (eptr < md->end_subject)
1770         {
1771         int len = 1;
1772         if (!utf8) c = *eptr; else
1773           {
1774           GETCHARLEN(c, eptr, len);
1775           }
1776         category = UCD_CATEGORY(c);
1777         if (category != ucp_M) break;
1778         eptr += len;
1779         }
1780       }
1781     ecode++;
1782     break;
1783 #endif
1784 
1785 
1786     /* Match a back reference, possibly repeatedly. Look past the end of the
1787     item to see if there is repeat information following. The code is similar
1788     to that for character classes, but repeated for efficiency. Then obey
1789     similar code to character type repeats - written out again for speed.
1790     However, if the referenced string is the empty string, always treat
1791     it as matched, any number of times (otherwise there could be infinite
1792     loops). */
1793 
1794     case OP_REF:
1795       {
1796       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1797       ecode += 3;
1798 
1799       /* If the reference is unset, there are two possibilities:
1800 
1801       (a) In the default, Perl-compatible state, set the length to be longer
1802       than the amount of subject left; this ensures that every attempt at a
1803       match fails. We can't just fail here, because of the possibility of
1804       quantifiers with zero minima.
1805 
1806       (b) If the JavaScript compatibility flag is set, set the length to zero
1807       so that the back reference matches an empty string.
1808 
1809       Otherwise, set the length to the length of what was matched by the
1810       referenced subpattern. */
1811 
1812       if (offset >= offset_top || md->offset_vector[offset] < 0)
1813         length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1814       else
1815         length = md->offset_vector[offset+1] - md->offset_vector[offset];
1816 
1817       /* Set up for repetition, or handle the non-repeated case */
1818 
1819       switch (*ecode)
1820         {
1821         case OP_CRSTAR:
1822         case OP_CRMINSTAR:
1823         case OP_CRPLUS:
1824         case OP_CRMINPLUS:
1825         case OP_CRQUERY:
1826         case OP_CRMINQUERY:
1827         c = *ecode++ - OP_CRSTAR;
1828         minimize = (c & 1) != 0;
1829         min = rep_min[c];                 /* Pick up values from tables; */
1830         max = rep_max[c];                 /* zero for max => infinity */
1831         if (max == 0) max = INT_MAX;
1832         break;
1833 
1834         case OP_CRRANGE:
1835         case OP_CRMINRANGE:
1836         minimize = (*ecode == OP_CRMINRANGE);
1837         min = GET2(ecode, 1);
1838         max = GET2(ecode, 3);
1839         if (max == 0) max = INT_MAX;
1840         ecode += 5;
1841         break;
1842 
1843         default:               /* No repeat follows */
1844         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1845         eptr += length;
1846         continue;              /* With the main loop */
1847         }
1848 
1849       /* If the length of the reference is zero, just continue with the
1850       main loop. */
1851 
1852       if (length == 0) continue;
1853 
1854       /* First, ensure the minimum number of matches are present. We get back
1855       the length of the reference string explicitly rather than passing the
1856       address of eptr, so that eptr can be a register variable. */
1857 
1858       for (i = 1; i <= min; i++)
1859         {
1860         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1861         eptr += length;
1862         }
1863 
1864       /* If min = max, continue at the same level without recursion.
1865       They are not both allowed to be zero. */
1866 
1867       if (min == max) continue;
1868 
1869       /* If minimizing, keep trying and advancing the pointer */
1870 
1871       if (minimize)
1872         {
1873         for (fi = min;; fi++)
1874           {
1875           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1876           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1877           if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1878             RRETURN(MATCH_NOMATCH);
1879           eptr += length;
1880           }
1881         /* Control never gets here */
1882         }
1883 
1884       /* If maximizing, find the longest string and work backwards */
1885 
1886       else
1887         {
1888         pp = eptr;
1889         for (i = min; i < max; i++)
1890           {
1891           if (!match_ref(offset, eptr, length, md, ims)) break;
1892           eptr += length;
1893           }
1894         while (eptr >= pp)
1895           {
1896           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1897           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1898           eptr -= length;
1899           }
1900         RRETURN(MATCH_NOMATCH);
1901         }
1902       }
1903     /* Control never gets here */
1904 
1905 
1906 
1907     /* Match a bit-mapped character class, possibly repeatedly. This op code is
1908     used when all the characters in the class have values in the range 0-255,
1909     and either the matching is caseful, or the characters are in the range
1910     0-127 when UTF-8 processing is enabled. The only difference between
1911     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1912     encountered.
1913 
1914     First, look past the end of the item to see if there is repeat information
1915     following. Then obey similar code to character type repeats - written out
1916     again for speed. */
1917 
1918     case OP_NCLASS:
1919     case OP_CLASS:
1920       {
1921       data = ecode + 1;                /* Save for matching */
1922       ecode += 33;                     /* Advance past the item */
1923 
1924       switch (*ecode)
1925         {
1926         case OP_CRSTAR:
1927         case OP_CRMINSTAR:
1928         case OP_CRPLUS:
1929         case OP_CRMINPLUS:
1930         case OP_CRQUERY:
1931         case OP_CRMINQUERY:
1932         c = *ecode++ - OP_CRSTAR;
1933         minimize = (c & 1) != 0;
1934         min = rep_min[c];                 /* Pick up values from tables; */
1935         max = rep_max[c];                 /* zero for max => infinity */
1936         if (max == 0) max = INT_MAX;
1937         break;
1938 
1939         case OP_CRRANGE:
1940         case OP_CRMINRANGE:
1941         minimize = (*ecode == OP_CRMINRANGE);
1942         min = GET2(ecode, 1);
1943         max = GET2(ecode, 3);
1944         if (max == 0) max = INT_MAX;
1945         ecode += 5;
1946         break;
1947 
1948         default:               /* No repeat follows */
1949         min = max = 1;
1950         break;
1951         }
1952 
1953       /* First, ensure the minimum number of matches are present. */
1954 
1955 #ifdef SUPPORT_UTF8
1956       /* UTF-8 mode */
1957       if (utf8)
1958         {
1959         for (i = 1; i <= min; i++)
1960           {
1961           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1962           GETCHARINC(c, eptr);
1963           if (c > 255)
1964             {
1965             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1966             }
1967           else
1968             {
1969             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1970             }
1971           }
1972         }
1973       else
1974 #endif
1975       /* Not UTF-8 mode */
1976         {
1977         for (i = 1; i <= min; i++)
1978           {
1979           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1980           c = *eptr++;
1981           if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1982           }
1983         }
1984 
1985       /* If max == min we can continue with the main loop without the
1986       need to recurse. */
1987 
1988       if (min == max) continue;
1989 
1990       /* If minimizing, keep testing the rest of the expression and advancing
1991       the pointer while it matches the class. */
1992 
1993       if (minimize)
1994         {
1995 #ifdef SUPPORT_UTF8
1996         /* UTF-8 mode */
1997         if (utf8)
1998           {
1999           for (fi = min;; fi++)
2000             {
2001             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2002             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2003             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2004             GETCHARINC(c, eptr);
2005             if (c > 255)
2006               {
2007               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2008               }
2009             else
2010               {
2011               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2012               }
2013             }
2014           }
2015         else
2016 #endif
2017         /* Not UTF-8 mode */
2018           {
2019           for (fi = min;; fi++)
2020             {
2021             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2022             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2023             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2024             c = *eptr++;
2025             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2026             }
2027           }
2028         /* Control never gets here */
2029         }
2030 
2031       /* If maximizing, find the longest possible run, then work backwards. */
2032 
2033       else
2034         {
2035         pp = eptr;
2036 
2037 #ifdef SUPPORT_UTF8
2038         /* UTF-8 mode */
2039         if (utf8)
2040           {
2041           for (i = min; i < max; i++)
2042             {
2043             int len = 1;
2044             if (eptr >= md->end_subject) break;
2045             GETCHARLEN(c, eptr, len);
2046             if (c > 255)
2047               {
2048               if (op == OP_CLASS) break;
2049               }
2050             else
2051               {
2052               if ((data[c/8] & (1 << (c&7))) == 0) break;
2053               }
2054             eptr += len;
2055             }
2056           for (;;)
2057             {
2058             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2059             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2060             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2061             BACKCHAR(eptr);
2062             }
2063           }
2064         else
2065 #endif
2066           /* Not UTF-8 mode */
2067           {
2068           for (i = min; i < max; i++)
2069             {
2070             if (eptr >= md->end_subject) break;
2071             c = *eptr;
2072             if ((data[c/8] & (1 << (c&7))) == 0) break;
2073             eptr++;
2074             }
2075           while (eptr >= pp)
2076             {
2077             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2078             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2079             eptr--;
2080             }
2081           }
2082 
2083         RRETURN(MATCH_NOMATCH);
2084         }
2085       }
2086     /* Control never gets here */
2087 
2088 
2089     /* Match an extended character class. This opcode is encountered only
2090     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2091     mode, because Unicode properties are supported in non-UTF-8 mode. */
2092 
2093 #ifdef SUPPORT_UTF8
2094     case OP_XCLASS:
2095       {
2096       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
2097       ecode += GET(ecode, 1);                      /* Advance past the item */
2098 
2099       switch (*ecode)
2100         {
2101         case OP_CRSTAR:
2102         case OP_CRMINSTAR:
2103         case OP_CRPLUS:
2104         case OP_CRMINPLUS:
2105         case OP_CRQUERY:
2106         case OP_CRMINQUERY:
2107         c = *ecode++ - OP_CRSTAR;
2108         minimize = (c & 1) != 0;
2109         min = rep_min[c];                 /* Pick up values from tables; */
2110         max = rep_max[c];                 /* zero for max => infinity */
2111         if (max == 0) max = INT_MAX;
2112         break;
2113 
2114         case OP_CRRANGE:
2115         case OP_CRMINRANGE:
2116         minimize = (*ecode == OP_CRMINRANGE);
2117         min = GET2(ecode, 1);
2118         max = GET2(ecode, 3);
2119         if (max == 0) max = INT_MAX;
2120         ecode += 5;
2121         break;
2122 
2123         default:               /* No repeat follows */
2124         min = max = 1;
2125         break;
2126         }
2127 
2128       /* First, ensure the minimum number of matches are present. */
2129 
2130       for (i = 1; i <= min; i++)
2131         {
2132         if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2133         GETCHARINCTEST(c, eptr);
2134         if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2135         }
2136 
2137       /* If max == min we can continue with the main loop without the
2138       need to recurse. */
2139 
2140       if (min == max) continue;
2141 
2142       /* If minimizing, keep testing the rest of the expression and advancing
2143       the pointer while it matches the class. */
2144 
2145       if (minimize)
2146         {
2147         for (fi = min;; fi++)
2148           {
2149           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2150           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2151           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2152           GETCHARINCTEST(c, eptr);
2153           if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2154           }
2155         /* Control never gets here */
2156         }
2157 
2158       /* If maximizing, find the longest possible run, then work backwards. */
2159 
2160       else
2161         {
2162         pp = eptr;
2163         for (i = min; i < max; i++)
2164           {
2165           int len = 1;
2166           if (eptr >= md->end_subject) break;
2167           GETCHARLENTEST(c, eptr, len);
2168           if (!_pcre_xclass(c, data)) break;
2169           eptr += len;
2170           }
2171         for(;;)
2172           {
2173           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2174           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2175           if (eptr-- == pp) break;        /* Stop if tried at original pos */
2176           if (utf8) BACKCHAR(eptr);
2177           }
2178         RRETURN(MATCH_NOMATCH);
2179         }
2180 
2181       /* Control never gets here */
2182       }
2183 #endif    /* End of XCLASS */
2184 
2185     /* Match a single character, casefully */
2186 
2187     case OP_CHAR:
2188 #ifdef SUPPORT_UTF8
2189     if (utf8)
2190       {
2191       length = 1;
2192       ecode++;
2193       GETCHARLEN(fc, ecode, length);
2194       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2195       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2196       }
2197     else
2198 #endif
2199 
2200     /* Non-UTF-8 mode */
2201       {
2202       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2203       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2204       ecode += 2;
2205       }
2206     break;
2207 
2208     /* Match a single character, caselessly */
2209 
2210     case OP_CHARNC:
2211 #ifdef SUPPORT_UTF8
2212     if (utf8)
2213       {
2214       length = 1;
2215       ecode++;
2216       GETCHARLEN(fc, ecode, length);
2217 
2218       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2219 
2220       /* If the pattern character's value is < 128, we have only one byte, and
2221       can use the fast lookup table. */
2222 
2223       if (fc < 128)
2224         {
2225         if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2226         }
2227 
2228       /* Otherwise we must pick up the subject character */
2229 
2230       else
2231         {
2232         unsigned int dc;
2233         GETCHARINC(dc, eptr);
2234         ecode += length;
2235 
2236         /* If we have Unicode property support, we can use it to test the other
2237         case of the character, if there is one. */
2238 
2239         if (fc != dc)
2240           {
2241 #ifdef SUPPORT_UCP
2242           if (dc != UCD_OTHERCASE(fc))
2243 #endif
2244             RRETURN(MATCH_NOMATCH);
2245           }
2246         }
2247       }
2248     else
2249 #endif   /* SUPPORT_UTF8 */
2250 
2251     /* Non-UTF-8 mode */
2252       {
2253       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2254       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2255       ecode += 2;
2256       }
2257     break;
2258 
2259     /* Match a single character repeatedly. */
2260 
2261     case OP_EXACT:
2262     min = max = GET2(ecode, 1);
2263     ecode += 3;
2264     goto REPEATCHAR;
2265 
2266     case OP_POSUPTO:
2267     possessive = TRUE;
2268     /* Fall through */
2269 
2270     case OP_UPTO:
2271     case OP_MINUPTO:
2272     min = 0;
2273     max = GET2(ecode, 1);
2274     minimize = *ecode == OP_MINUPTO;
2275     ecode += 3;
2276     goto REPEATCHAR;
2277 
2278     case OP_POSSTAR:
2279     possessive = TRUE;
2280     min = 0;
2281     max = INT_MAX;
2282     ecode++;
2283     goto REPEATCHAR;
2284 
2285     case OP_POSPLUS:
2286     possessive = TRUE;
2287     min = 1;
2288     max = INT_MAX;
2289     ecode++;
2290     goto REPEATCHAR;
2291 
2292     case OP_POSQUERY:
2293     possessive = TRUE;
2294     min = 0;
2295     max = 1;
2296     ecode++;
2297     goto REPEATCHAR;
2298 
2299     case OP_STAR:
2300     case OP_MINSTAR:
2301     case OP_PLUS:
2302     case OP_MINPLUS:
2303     case OP_QUERY:
2304     case OP_MINQUERY:
2305     c = *ecode++ - OP_STAR;
2306     minimize = (c & 1) != 0;
2307     min = rep_min[c];                 /* Pick up values from tables; */
2308     max = rep_max[c];                 /* zero for max => infinity */
2309     if (max == 0) max = INT_MAX;
2310 
2311     /* Common code for all repeated single-character matches. We can give
2312     up quickly if there are fewer than the minimum number of characters left in
2313     the subject. */
2314 
2315     REPEATCHAR:
2316 #ifdef SUPPORT_UTF8
2317     if (utf8)
2318       {
2319       length = 1;
2320       charptr = ecode;
2321       GETCHARLEN(fc, ecode, length);
2322       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2323       ecode += length;
2324 
2325       /* Handle multibyte character matching specially here. There is
2326       support for caseless matching if UCP support is present. */
2327 
2328       if (length > 1)
2329         {
2330 #ifdef SUPPORT_UCP
2331         unsigned int othercase;
2332         if ((ims & PCRE_CASELESS) != 0 &&
2333             (othercase = UCD_OTHERCASE(fc)) != fc)
2334           oclength = _pcre_ord2utf8(othercase, occhars);
2335         else oclength = 0;
2336 #endif  /* SUPPORT_UCP */
2337 
2338         for (i = 1; i <= min; i++)
2339           {
2340           if (memcmp(eptr, charptr, length) == 0) eptr += length;
2341 #ifdef SUPPORT_UCP
2342           /* Need braces because of following else */
2343           else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2344           else
2345             {
2346             if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2347             eptr += oclength;
2348             }
2349 #else   /* without SUPPORT_UCP */
2350           else { RRETURN(MATCH_NOMATCH); }
2351 #endif  /* SUPPORT_UCP */
2352           }
2353 
2354         if (min == max) continue;
2355 
2356         if (minimize)
2357           {
2358           for (fi = min;; fi++)
2359             {
2360             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2361             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2362             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2363             if (memcmp(eptr, charptr, length) == 0) eptr += length;
2364 #ifdef SUPPORT_UCP
2365             /* Need braces because of following else */
2366             else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2367             else
2368               {
2369               if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2370               eptr += oclength;
2371               }
2372 #else   /* without SUPPORT_UCP */
2373             else { RRETURN (MATCH_NOMATCH); }
2374 #endif  /* SUPPORT_UCP */
2375             }
2376           /* Control never gets here */
2377           }
2378 
2379         else  /* Maximize */
2380           {
2381           pp = eptr;
2382           for (i = min; i < max; i++)
2383             {
2384             if (eptr > md->end_subject - length) break;
2385             if (memcmp(eptr, charptr, length) == 0) eptr += length;
2386 #ifdef SUPPORT_UCP
2387             else if (oclength == 0) break;
2388             else
2389               {
2390               if (memcmp(eptr, occhars, oclength) != 0) break;
2391               eptr += oclength;
2392               }
2393 #else   /* without SUPPORT_UCP */
2394             else break;
2395 #endif  /* SUPPORT_UCP */
2396             }
2397 
2398           if (possessive) continue;
2399           for(;;)
2400            {
2401            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2402            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2403            if (eptr == pp) RRETURN(MATCH_NOMATCH);
2404 #ifdef SUPPORT_UCP
2405            eptr--;
2406            BACKCHAR(eptr);
2407 #else   /* without SUPPORT_UCP */
2408            eptr -= length;
2409 #endif  /* SUPPORT_UCP */
2410            }
2411           }
2412         /* Control never gets here */
2413         }
2414 
2415       /* If the length of a UTF-8 character is 1, we fall through here, and
2416       obey the code as for non-UTF-8 characters below, though in this case the
2417       value of fc will always be < 128. */
2418       }
2419     else
2420 #endif  /* SUPPORT_UTF8 */
2421 
2422     /* When not in UTF-8 mode, load a single-byte character. */
2423       {
2424       if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2425       fc = *ecode++;
2426       }
2427 
2428     /* The value of fc at this point is always less than 256, though we may or
2429     may not be in UTF-8 mode. The code is duplicated for the caseless and
2430     caseful cases, for speed, since matching characters is likely to be quite
2431     common. First, ensure the minimum number of matches are present. If min =
2432     max, continue at the same level without recursing. Otherwise, if
2433     minimizing, keep trying the rest of the expression and advancing one
2434     matching character if failing, up to the maximum. Alternatively, if
2435     maximizing, find the maximum number of characters and work backwards. */
2436 
2437     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2438       max, eptr));
2439 
2440     if ((ims & PCRE_CASELESS) != 0)
2441       {
2442       fc = md->lcc[fc];
2443       for (i = 1; i <= min; i++)
2444         if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2445       if (min == max) continue;
2446       if (minimize)
2447         {
2448         for (fi = min;; fi++)
2449           {
2450           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2451           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2452           if (fi >= max || eptr >= md->end_subject ||
2453               fc != md->lcc[*eptr++])
2454             RRETURN(MATCH_NOMATCH);
2455           }
2456         /* Control never gets here */
2457         }
2458       else  /* Maximize */
2459         {
2460         pp = eptr;
2461         for (i = min; i < max; i++)
2462           {
2463           if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2464           eptr++;
2465           }
2466         if (possessive) continue;
2467         while (eptr >= pp)
2468           {
2469           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2470           eptr--;
2471           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2472           }
2473         RRETURN(MATCH_NOMATCH);
2474         }
2475       /* Control never gets here */
2476       }
2477 
2478     /* Caseful comparisons (includes all multi-byte characters) */
2479 
2480     else
2481       {
2482       for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2483       if (min == max) continue;
2484       if (minimize)
2485         {
2486         for (fi = min;; fi++)
2487           {
2488           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2489           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2490           if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2491             RRETURN(MATCH_NOMATCH);
2492           }
2493         /* Control never gets here */
2494         }
2495       else  /* Maximize */
2496         {
2497         pp = eptr;
2498         for (i = min; i < max; i++)
2499           {
2500           if (eptr >= md->end_subject || fc != *eptr) break;
2501           eptr++;
2502           }
2503         if (possessive) continue;
2504         while (eptr >= pp)
2505           {
2506           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2507           eptr--;
2508           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2509           }
2510         RRETURN(MATCH_NOMATCH);
2511         }
2512       }
2513     /* Control never gets here */
2514 
2515     /* Match a negated single one-byte character. The character we are
2516     checking can be multibyte. */
2517 
2518     case OP_NOT:
2519     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2520     ecode++;
2521     GETCHARINCTEST(c, eptr);
2522     if ((ims & PCRE_CASELESS) != 0)
2523       {
2524 #ifdef SUPPORT_UTF8
2525       if (c < 256)
2526 #endif
2527       c = md->lcc[c];
2528       if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2529       }
2530     else
2531       {
2532       if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2533       }
2534     break;
2535 
2536     /* Match a negated single one-byte character repeatedly. This is almost a
2537     repeat of the code for a repeated single character, but I haven't found a
2538     nice way of commoning these up that doesn't require a test of the
2539     positive/negative option for each character match. Maybe that wouldn't add
2540     very much to the time taken, but character matching *is* what this is all
2541     about... */
2542 
2543     case OP_NOTEXACT:
2544     min = max = GET2(ecode, 1);
2545     ecode += 3;
2546     goto REPEATNOTCHAR;
2547 
2548     case OP_NOTUPTO:
2549     case OP_NOTMINUPTO:
2550     min = 0;
2551     max = GET2(ecode, 1);
2552     minimize = *ecode == OP_NOTMINUPTO;
2553     ecode += 3;
2554     goto REPEATNOTCHAR;
2555 
2556     case OP_NOTPOSSTAR:
2557     possessive = TRUE;
2558     min = 0;
2559     max = INT_MAX;
2560     ecode++;
2561     goto REPEATNOTCHAR;
2562 
2563     case OP_NOTPOSPLUS:
2564     possessive = TRUE;
2565     min = 1;
2566     max = INT_MAX;
2567     ecode++;
2568     goto REPEATNOTCHAR;
2569 
2570     case OP_NOTPOSQUERY:
2571     possessive = TRUE;
2572     min = 0;
2573     max = 1;
2574     ecode++;
2575     goto REPEATNOTCHAR;
2576 
2577     case OP_NOTPOSUPTO:
2578     possessive = TRUE;
2579     min = 0;
2580     max = GET2(ecode, 1);
2581     ecode += 3;
2582     goto REPEATNOTCHAR;
2583 
2584     case OP_NOTSTAR:
2585     case OP_NOTMINSTAR:
2586     case OP_NOTPLUS:
2587     case OP_NOTMINPLUS:
2588     case OP_NOTQUERY:
2589     case OP_NOTMINQUERY:
2590     c = *ecode++ - OP_NOTSTAR;
2591     minimize = (c & 1) != 0;
2592     min = rep_min[c];                 /* Pick up values from tables; */
2593     max = rep_max[c];                 /* zero for max => infinity */
2594     if (max == 0) max = INT_MAX;
2595 
2596     /* Common code for all repeated single-byte matches. We can give up quickly
2597     if there are fewer than the minimum number of bytes left in the
2598     subject. */
2599 
2600     REPEATNOTCHAR:
2601     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2602     fc = *ecode++;
2603 
2604     /* The code is duplicated for the caseless and caseful cases, for speed,
2605     since matching characters is likely to be quite common. First, ensure the
2606     minimum number of matches are present. If min = max, continue at the same
2607     level without recursing. Otherwise, if minimizing, keep trying the rest of
2608     the expression and advancing one matching character if failing, up to the
2609     maximum. Alternatively, if maximizing, find the maximum number of
2610     characters and work backwards. */
2611 
2612     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2613       max, eptr));
2614 
2615     if ((ims & PCRE_CASELESS) != 0)
2616       {
2617       fc = md->lcc[fc];
2618 
2619 #ifdef SUPPORT_UTF8
2620       /* UTF-8 mode */
2621       if (utf8)
2622         {
2623         register unsigned int d;
2624         for (i = 1; i <= min; i++)
2625           {
2626           GETCHARINC(d, eptr);
2627           if (d < 256) d = md->lcc[d];
2628           if (fc == d) RRETURN(MATCH_NOMATCH);
2629           }
2630         }
2631       else
2632 #endif
2633 
2634       /* Not UTF-8 mode */
2635         {
2636         for (i = 1; i <= min; i++)
2637           if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2638         }
2639 
2640       if (min == max) continue;
2641 
2642       if (minimize)
2643         {
2644 #ifdef SUPPORT_UTF8
2645         /* UTF-8 mode */
2646         if (utf8)
2647           {
2648           register unsigned int d;
2649           for (fi = min;; fi++)
2650             {
2651             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2652             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2653             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2654             GETCHARINC(d, eptr);
2655             if (d < 256) d = md->lcc[d];
2656             if (fc == d) RRETURN(MATCH_NOMATCH);
2657 
2658             }
2659           }
2660         else
2661 #endif
2662         /* Not UTF-8 mode */
2663           {
2664           for (fi = min;; fi++)
2665             {
2666             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2667             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2668             if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2669               RRETURN(MATCH_NOMATCH);
2670             }
2671           }
2672         /* Control never gets here */
2673         }
2674 
2675       /* Maximize case */
2676 
2677       else
2678         {
2679         pp = eptr;
2680 
2681 #ifdef SUPPORT_UTF8
2682         /* UTF-8 mode */
2683         if (utf8)
2684           {
2685           register unsigned int d;
2686           for (i = min; i < max; i++)
2687             {
2688             int len = 1;
2689             if (eptr >= md->end_subject) break;
2690             GETCHARLEN(d, eptr, len);
2691             if (d < 256) d = md->lcc[d];
2692             if (fc == d) break;
2693             eptr += len;
2694             }
2695         if (possessive) continue;
2696         for(;;)
2697             {
2698             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2699             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2700             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2701             BACKCHAR(eptr);
2702             }
2703           }
2704         else
2705 #endif
2706         /* Not UTF-8 mode */
2707           {
2708           for (i = min; i < max; i++)
2709             {
2710             if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2711             eptr++;
2712             }
2713           if (possessive) continue;
2714           while (eptr >= pp)
2715             {
2716             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2717             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2718             eptr--;
2719             }
2720           }
2721 
2722         RRETURN(MATCH_NOMATCH);
2723         }
2724       /* Control never gets here */
2725       }
2726 
2727     /* Caseful comparisons */
2728 
2729     else
2730       {
2731 #ifdef SUPPORT_UTF8
2732       /* UTF-8 mode */
2733       if (utf8)
2734         {
2735         register unsigned int d;
2736         for (i = 1; i <= min; i++)
2737           {
2738           GETCHARINC(d, eptr);
2739           if (fc == d) RRETURN(MATCH_NOMATCH);
2740           }
2741         }
2742       else
2743 #endif
2744       /* Not UTF-8 mode */
2745         {
2746         for (i = 1; i <= min; i++)
2747           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2748         }
2749 
2750       if (min == max) continue;
2751 
2752       if (minimize)
2753         {
2754 #ifdef SUPPORT_UTF8
2755         /* UTF-8 mode */
2756         if (utf8)
2757           {
2758           register unsigned int d;
2759           for (fi = min;; fi++)
2760             {
2761             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2762             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2763             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2764             GETCHARINC(d, eptr);
2765             if (fc == d) RRETURN(MATCH_NOMATCH);
2766             }
2767           }
2768         else
2769 #endif
2770         /* Not UTF-8 mode */
2771           {
2772           for (fi = min;; fi++)
2773             {
2774             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2775             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2776             if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2777               RRETURN(MATCH_NOMATCH);
2778             }
2779           }
2780         /* Control never gets here */
2781         }
2782 
2783       /* Maximize case */
2784 
2785       else
2786         {
2787         pp = eptr;
2788 
2789 #ifdef SUPPORT_UTF8
2790         /* UTF-8 mode */
2791         if (utf8)
2792           {
2793           register unsigned int d;
2794           for (i = min; i < max; i++)
2795             {
2796             int len = 1;
2797             if (eptr >= md->end_subject) break;
2798             GETCHARLEN(d, eptr, len);
2799             if (fc == d) break;
2800             eptr += len;
2801             }
2802           if (possessive) continue;
2803           for(;;)
2804             {
2805             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2806             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2807             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2808             BACKCHAR(eptr);
2809             }
2810           }
2811         else
2812 #endif
2813         /* Not UTF-8 mode */
2814           {
2815           for (i = min; i < max; i++)
2816             {
2817             if (eptr >= md->end_subject || fc == *eptr) break;
2818             eptr++;
2819             }
2820           if (possessive) continue;
2821           while (eptr >= pp)
2822             {
2823             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2824             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2825             eptr--;
2826             }
2827           }
2828 
2829         RRETURN(MATCH_NOMATCH);
2830         }
2831       }
2832     /* Control never gets here */
2833 
2834     /* Match a single character type repeatedly; several different opcodes
2835     share code. This is very similar to the code for single characters, but we
2836     repeat it in the interests of efficiency. */
2837 
2838     case OP_TYPEEXACT:
2839     min = max = GET2(ecode, 1);
2840     minimize = TRUE;
2841     ecode += 3;
2842     goto REPEATTYPE;
2843 
2844     case OP_TYPEUPTO:
2845     case OP_TYPEMINUPTO:
2846     min = 0;
2847     max = GET2(ecode, 1);
2848     minimize = *ecode == OP_TYPEMINUPTO;
2849     ecode += 3;
2850     goto REPEATTYPE;
2851 
2852     case OP_TYPEPOSSTAR:
2853     possessive = TRUE;
2854     min = 0;
2855     max = INT_MAX;
2856     ecode++;
2857     goto REPEATTYPE;
2858 
2859     case OP_TYPEPOSPLUS:
2860     possessive = TRUE;
2861     min = 1;
2862     max = INT_MAX;
2863     ecode++;
2864     goto REPEATTYPE;
2865 
2866     case OP_TYPEPOSQUERY:
2867     possessive = TRUE;
2868     min = 0;
2869     max = 1;
2870     ecode++;
2871     goto REPEATTYPE;
2872 
2873     case OP_TYPEPOSUPTO:
2874     possessive = TRUE;
2875     min = 0;
2876     max = GET2(ecode, 1);
2877     ecode += 3;
2878     goto REPEATTYPE;
2879 
2880     case OP_TYPESTAR:
2881     case OP_TYPEMINSTAR:
2882     case OP_TYPEPLUS:
2883     case OP_TYPEMINPLUS:
2884     case OP_TYPEQUERY:
2885     case OP_TYPEMINQUERY:
2886     c = *ecode++ - OP_TYPESTAR;
2887     minimize = (c & 1) != 0;
2888     min = rep_min[c];                 /* Pick up values from tables; */
2889     max = rep_max[c];                 /* zero for max => infinity */
2890     if (max == 0) max = INT_MAX;
2891 
2892     /* Common code for all repeated single character type matches. Note that
2893     in UTF-8 mode, '.' matches a character of any length, but for the other
2894     character types, the valid characters are all one-byte long. */
2895 
2896     REPEATTYPE:
2897     ctype = *ecode++;      /* Code for the character type */
2898 
2899 #ifdef SUPPORT_UCP
2900     if (ctype == OP_PROP || ctype == OP_NOTPROP)
2901       {
2902       prop_fail_result = ctype == OP_NOTPROP;
2903       prop_type = *ecode++;
2904       prop_value = *ecode++;
2905       }
2906     else prop_type = -1;
2907 #endif
2908 
2909     /* First, ensure the minimum number of matches are present. Use inline
2910     code for maximizing the speed, and do the type test once at the start
2911     (i.e. keep it out of the loop). Also we can test that there are at least
2912     the minimum number of bytes before we start. This isn't as effective in
2913     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
2914     is tidier. Also separate the UCP code, which can be the same for both UTF-8
2915     and single-bytes. */
2916 
2917     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2918     if (min > 0)
2919       {
2920 #ifdef SUPPORT_UCP
2921       if (prop_type >= 0)
2922         {
2923         switch(prop_type)
2924           {
2925           case PT_ANY:
2926           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2927           for (i = 1; i <= min; i++)
2928             {
2929             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2930             GETCHARINCTEST(c, eptr);
2931             }
2932           break;
2933 
2934           case PT_LAMP:
2935           for (i = 1; i <= min; i++)
2936             {
2937             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2938             GETCHARINCTEST(c, eptr);
2939             prop_chartype = UCD_CHARTYPE(c);
2940             if ((prop_chartype == ucp_Lu ||
2941                  prop_chartype == ucp_Ll ||
2942                  prop_chartype == ucp_Lt) == prop_fail_result)
2943               RRETURN(MATCH_NOMATCH);
2944             }
2945           break;
2946 
2947           case PT_GC:
2948           for (i = 1; i <= min; i++)
2949             {
2950             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2951             GETCHARINCTEST(c, eptr);
2952             prop_category = UCD_CATEGORY(c);
2953             if ((prop_category == prop_value) == prop_fail_result)
2954               RRETURN(MATCH_NOMATCH);
2955             }
2956           break;
2957 
2958           case PT_PC:
2959           for (i = 1; i <= min; i++)
2960             {
2961             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2962             GETCHARINCTEST(c, eptr);
2963             prop_chartype = UCD_CHARTYPE(c);
2964             if ((prop_chartype == prop_value) == prop_fail_result)
2965               RRETURN(MATCH_NOMATCH);
2966             }
2967           break;
2968 
2969           case PT_SC:
2970           for (i = 1; i <= min; i++)
2971             {
2972             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2973             GETCHARINCTEST(c, eptr);
2974             prop_script = UCD_SCRIPT(c);
2975             if ((prop_script == prop_value) == prop_fail_result)
2976               RRETURN(MATCH_NOMATCH);
2977             }
2978           break;
2979 
2980           default:
2981           RRETURN(PCRE_ERROR_INTERNAL);
2982           }
2983         }
2984 
2985       /* Match extended Unicode sequences. We will get here only if the
2986       support is in the binary; otherwise a compile-time error occurs. */
2987 
2988       else if (ctype == OP_EXTUNI)
2989         {
2990         for (i = 1; i <= min; i++)
2991           {
2992           GETCHARINCTEST(c, eptr);
2993           prop_category = UCD_CATEGORY(c);
2994           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2995           while (eptr < md->end_subject)
2996             {
2997             int len = 1;
2998             if (!utf8) c = *eptr; else
2999               {
3000               GETCHARLEN(c, eptr, len);
3001               }
3002             prop_category = UCD_CATEGORY(c);
3003             if (prop_category != ucp_M) break;
3004             eptr += len;
3005             }
3006           }
3007         }
3008 
3009       else
3010 #endif     /* SUPPORT_UCP */
3011 
3012 /* Handle all other cases when the coding is UTF-8 */
3013 
3014 #ifdef SUPPORT_UTF8
3015       if (utf8) switch(ctype)
3016         {
3017         case OP_ANY:
3018         for (i = 1; i <= min; i++)
3019           {
3020           if (eptr >= md->end_subject || IS_NEWLINE(eptr))
3021             RRETURN(MATCH_NOMATCH);
3022           eptr++;
3023           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3024           }
3025         break;
3026 
3027         case OP_ALLANY:
3028         for (i = 1; i <= min; i++)
3029           {
3030           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3031           eptr++;
3032           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3033           }
3034         break;
3035 
3036         case OP_ANYBYTE:
3037         eptr += min;
3038         break;
3039 
3040         case OP_ANYNL:
3041         for (i = 1; i <= min; i++)
3042           {
3043           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3044           GETCHARINC(c, eptr);
3045           switch(c)
3046             {
3047             default: RRETURN(MATCH_NOMATCH);
3048             case 0x000d:
3049             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3050             break;
3051 
3052             case 0x000a:
3053             break;
3054 
3055             case 0x000b:
3056             case 0x000c:
3057             case 0x0085:
3058             case 0x2028:
3059             case 0x2029:
3060             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3061             break;
3062             }
3063           }
3064         break;
3065 
3066         case OP_NOT_HSPACE:
3067         for (i = 1; i <= min; i++)
3068           {
3069           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3070           GETCHARINC(c, eptr);
3071           switch(c)
3072             {
3073             default: break;
3074             case 0x09:      /* HT */
3075             case 0x20:      /* SPACE */
3076             case 0xa0:      /* NBSP */
3077             case 0x1680:    /* OGHAM SPACE MARK */
3078             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3079             case 0x2000:    /* EN QUAD */
3080             case 0x2001:    /* EM QUAD */
3081             case 0x2002:    /* EN SPACE */
3082             case 0x2003:    /* EM SPACE */
3083             case 0x2004:    /* THREE-PER-EM SPACE */
3084             case 0x2005:    /* FOUR-PER-EM SPACE */
3085             case 0x2006:    /* SIX-PER-EM SPACE */
3086             case 0x2007:    /* FIGURE SPACE */
3087             case 0x2008:    /* PUNCTUATION SPACE */
3088             case 0x2009:    /* THIN SPACE */
3089             case 0x200A:    /* HAIR SPACE */
3090             case 0x202f:    /* NARROW NO-BREAK SPACE */
3091             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3092             case 0x3000:    /* IDEOGRAPHIC SPACE */
3093             RRETURN(MATCH_NOMATCH);
3094             }
3095           }
3096         break;
3097 
3098         case OP_HSPACE:
3099         for (i = 1; i <= min; i++)
3100           {
3101           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3102           GETCHARINC(c, eptr);
3103           switch(c)
3104             {
3105             default: RRETURN(MATCH_NOMATCH);
3106             case 0x09:      /* HT */
3107             case 0x20:      /* SPACE */
3108             case 0xa0:      /* NBSP */
3109             case 0x1680:    /* OGHAM SPACE MARK */
3110             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3111             case 0x2000:    /* EN QUAD */
3112             case 0x2001:    /* EM QUAD */
3113             case 0x2002:    /* EN SPACE */
3114             case 0x2003:    /* EM SPACE */
3115             case 0x2004:    /* THREE-PER-EM SPACE */
3116             case 0x2005:    /* FOUR-PER-EM SPACE */
3117             case 0x2006:    /* SIX-PER-EM SPACE */
3118             case 0x2007:    /* FIGURE SPACE */
3119             case 0x2008:    /* PUNCTUATION SPACE */
3120             case 0x2009:    /* THIN SPACE */
3121             case 0x200A:    /* HAIR SPACE */
3122             case 0x202f:    /* NARROW NO-BREAK SPACE */
3123             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3124             case 0x3000:    /* IDEOGRAPHIC SPACE */
3125             break;
3126             }
3127           }
3128         break;
3129 
3130         case OP_NOT_VSPACE:
3131         for (i = 1; i <= min; i++)
3132           {
3133           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3134           GETCHARINC(c, eptr);
3135           switch(c)
3136             {
3137             default: break;
3138             case 0x0a:      /* LF */
3139             case 0x0b:      /* VT */
3140             case 0x0c:      /* FF */
3141             case 0x0d:      /* CR */
3142             case 0x85:      /* NEL */
3143             case 0x2028:    /* LINE SEPARATOR */
3144             case 0x2029:    /* PARAGRAPH SEPARATOR */
3145             RRETURN(MATCH_NOMATCH);
3146             }
3147           }
3148         break;
3149 
3150         case OP_VSPACE:
3151         for (i = 1; i <= min; i++)
3152           {
3153           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3154           GETCHARINC(c, eptr);
3155           switch(c)
3156             {
3157             default: RRETURN(MATCH_NOMATCH);
3158             case 0x0a:      /* LF */
3159             case 0x0b:      /* VT */
3160             case 0x0c:      /* FF */
3161             case 0x0d:      /* CR */
3162             case 0x85:      /* NEL */
3163             case 0x2028:    /* LINE SEPARATOR */
3164             case 0x2029:    /* PARAGRAPH SEPARATOR */
3165             break;
3166             }
3167           }
3168         break;
3169 
3170         case OP_NOT_DIGIT:
3171         for (i = 1; i <= min; i++)
3172           {
3173           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3174           GETCHARINC(c, eptr);
3175           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3176             RRETURN(MATCH_NOMATCH);
3177           }
3178         break;
3179 
3180         case OP_DIGIT:
3181         for (i = 1; i <= min; i++)
3182           {
3183           if (eptr >= md->end_subject ||
3184              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3185             RRETURN(MATCH_NOMATCH);
3186           /* No need to skip more bytes - we know it's a 1-byte character */
3187           }
3188         break;
3189 
3190         case OP_NOT_WHITESPACE:
3191         for (i = 1; i <= min; i++)
3192           {
3193           if (eptr >= md->end_subject ||
3194              (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))
3195             RRETURN(MATCH_NOMATCH);
3196           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3197           }
3198         break;
3199 
3200         case OP_WHITESPACE:
3201         for (i = 1; i <= min; i++)
3202           {
3203           if (eptr >= md->end_subject ||
3204              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3205             RRETURN(MATCH_NOMATCH);
3206           /* No need to skip more bytes - we know it's a 1-byte character */
3207           }
3208         break;
3209 
3210         case OP_NOT_WORDCHAR:
3211         for (i = 1; i <= min; i++)
3212           {
3213           if (eptr >= md->end_subject ||
3214              (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
3215             RRETURN(MATCH_NOMATCH);
3216           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3217           }
3218         break;
3219 
3220         case OP_WORDCHAR:
3221         for (i = 1; i <= min; i++)
3222           {
3223           if (eptr >= md->end_subject ||
3224              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3225             RRETURN(MATCH_NOMATCH);
3226           /* No need to skip more bytes - we know it's a 1-byte character */
3227           }
3228         break;
3229 
3230         default:
3231         RRETURN(PCRE_ERROR_INTERNAL);
3232         }  /* End switch(ctype) */
3233 
3234       else
3235 #endif     /* SUPPORT_UTF8 */
3236 
3237       /* Code for the non-UTF-8 case for minimum matching of operators other
3238       than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
3239       number of bytes present, as this was tested above. */
3240 
3241       switch(ctype)
3242         {
3243         case OP_ANY:
3244         for (i = 1; i <= min; i++)
3245           {
3246           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3247           eptr++;
3248           }
3249         break;
3250 
3251         case OP_ALLANY:
3252         eptr += min;
3253         break;
3254 
3255         case OP_ANYBYTE:
3256         eptr += min;
3257         break;
3258 
3259         /* Because of the CRLF case, we can't assume the minimum number of
3260         bytes are present in this case. */
3261 
3262         case OP_ANYNL:
3263         for (i = 1; i <= min; i++)
3264           {
3265           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3266           switch(*eptr++)
3267             {
3268             default: RRETURN(MATCH_NOMATCH);
3269             case 0x000d:
3270             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3271             break;
3272             case 0x000a:
3273             break;
3274 
3275             case 0x000b:
3276             case 0x000c:
3277             case 0x0085:
3278             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3279             break;
3280             }
3281           }
3282         break;
3283 
3284         case OP_NOT_HSPACE:
3285         for (i = 1; i <= min; i++)
3286           {
3287           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3288           switch(*eptr++)
3289             {
3290             default: break;
3291             case 0x09:      /* HT */
3292             case 0x20:      /* SPACE */
3293             case 0xa0:      /* NBSP */
3294             RRETURN(MATCH_NOMATCH);
3295             }
3296           }
3297         break;
3298 
3299         case OP_HSPACE:
3300         for (i = 1; i <= min; i++)
3301           {
3302           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3303           switch(*eptr++)
3304             {
3305             default: RRETURN(MATCH_NOMATCH);
3306             case 0x09:      /* HT */
3307             case 0x20:      /* SPACE */
3308             case 0xa0:      /* NBSP */
3309             break;
3310             }
3311           }
3312         break;
3313 
3314         case OP_NOT_VSPACE:
3315         for (i = 1; i <= min; i++)
3316           {
3317           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3318           switch(*eptr++)
3319             {
3320             default: break;
3321             case 0x0a:      /* LF */
3322             case 0x0b:      /* VT */
3323             case 0x0c:      /* FF */
3324             case 0x0d:      /* CR */
3325             case 0x85:      /* NEL */
3326             RRETURN(MATCH_NOMATCH);
3327             }
3328           }
3329         break;
3330 
3331         case OP_VSPACE:
3332         for (i = 1; i <= min; i++)
3333           {
3334           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3335           switch(*eptr++)
3336             {
3337             default: RRETURN(MATCH_NOMATCH);
3338             case 0x0a:      /* LF */
3339             case 0x0b:      /* VT */
3340             case 0x0c:      /* FF */
3341             case 0x0d:      /* CR */
3342             case 0x85:      /* NEL */
3343             break;
3344             }
3345           }
3346         break;
3347 
3348         case OP_NOT_DIGIT:
3349         for (i = 1; i <= min; i++)
3350           if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3351         break;
3352 
3353         case OP_DIGIT:
3354         for (i = 1; i <= min; i++)
3355           if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3356         break;
3357 
3358         case OP_NOT_WHITESPACE:
3359         for (i = 1; i <= min; i++)
3360           if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3361         break;
3362 
3363         case OP_WHITESPACE:
3364         for (i = 1; i <= min; i++)
3365           if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3366         break;
3367 
3368         case OP_NOT_WORDCHAR:
3369         for (i = 1; i <= min; i++)
3370           if ((md->ctypes[*eptr++] & ctype_word) != 0)
3371             RRETURN(MATCH_NOMATCH);
3372         break;
3373 
3374         case OP_WORDCHAR:
3375         for (i = 1; i <= min; i++)
3376           if ((md->ctypes[*eptr++] & ctype_word) == 0)
3377             RRETURN(MATCH_NOMATCH);
3378         break;
3379 
3380         default:
3381         RRETURN(PCRE_ERROR_INTERNAL);
3382         }
3383       }
3384 
3385     /* If min = max, continue at the same level without recursing */
3386 
3387     if (min == max) continue;
3388 
3389     /* If minimizing, we have to test the rest of the pattern before each
3390     subsequent match. Again, separate the UTF-8 case for speed, and also
3391     separate the UCP cases. */
3392 
3393     if (minimize)
3394       {
3395 #ifdef SUPPORT_UCP
3396       if (prop_type >= 0)
3397         {
3398         switch(prop_type)
3399           {
3400           case PT_ANY:
3401           for (fi = min;; fi++)
3402             {
3403             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3404             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3405             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3406             GETCHARINC(c, eptr);
3407             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3408             }
3409           /* Control never gets here */
3410 
3411           case PT_LAMP:
3412           for (fi = min;; fi++)
3413             {
3414             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3415             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3416             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3417             GETCHARINC(c, eptr);
3418             prop_chartype = UCD_CHARTYPE(c);
3419             if ((prop_chartype == ucp_Lu ||
3420                  prop_chartype == ucp_Ll ||
3421                  prop_chartype == ucp_Lt) == prop_fail_result)
3422               RRETURN(MATCH_NOMATCH);
3423             }
3424           /* Control never gets here */
3425 
3426           case PT_GC:
3427           for (fi = min;; fi++)
3428             {
3429             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3430             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3431             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3432             GETCHARINC(c, eptr);
3433             prop_category = UCD_CATEGORY(c);
3434             if ((prop_category == prop_value) == prop_fail_result)
3435               RRETURN(MATCH_NOMATCH);
3436             }
3437           /* Control never gets here */
3438 
3439           case PT_PC:
3440           for (fi = min;; fi++)
3441             {
3442             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3443             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3444             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3445             GETCHARINC(c, eptr);
3446             prop_chartype = UCD_CHARTYPE(c);
3447             if ((prop_chartype == prop_value) == prop_fail_result)
3448               RRETURN(MATCH_NOMATCH);
3449             }
3450           /* Control never gets here */
3451 
3452           case PT_SC:
3453           for (fi = min;; fi++)
3454             {
3455             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3456             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3457             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3458             GETCHARINC(c, eptr);
3459             prop_script = UCD_SCRIPT(c);
3460             if ((prop_script == prop_value) == prop_fail_result)
3461               RRETURN(MATCH_NOMATCH);
3462             }
3463           /* Control never gets here */
3464 
3465           default:
3466           RRETURN(PCRE_ERROR_INTERNAL);
3467           }
3468         }
3469 
3470       /* Match extended Unicode sequences. We will get here only if the
3471       support is in the binary; otherwise a compile-time error occurs. */
3472 
3473       else if (ctype == OP_EXTUNI)
3474         {
3475         for (fi = min;; fi++)
3476           {
3477           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3478           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3479           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3480           GETCHARINCTEST(c, eptr);
3481           prop_category = UCD_CATEGORY(c);
3482           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3483           while (eptr < md->end_subject)
3484             {
3485             int len = 1;
3486             if (!utf8) c = *eptr; else
3487               {
3488               GETCHARLEN(c, eptr, len);
3489               }
3490             prop_category = UCD_CATEGORY(c);
3491             if (prop_category != ucp_M) break;
3492             eptr += len;
3493             }
3494           }
3495         }
3496 
3497       else
3498 #endif     /* SUPPORT_UCP */
3499 
3500 #ifdef SUPPORT_UTF8
3501       /* UTF-8 mode */
3502       if (utf8)
3503         {
3504         for (fi = min;; fi++)
3505           {
3506           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3507           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3508           if (fi >= max || eptr >= md->end_subject ||
3509                (ctype == OP_ANY && IS_NEWLINE(eptr)))
3510             RRETURN(MATCH_NOMATCH);
3511 
3512           GETCHARINC(c, eptr);
3513           switch(ctype)
3514             {
3515             case OP_ANY:        /* This is the non-NL case */
3516             case OP_ALLANY:
3517             case OP_ANYBYTE:
3518             break;
3519 
3520             case OP_ANYNL:
3521             switch(c)
3522               {
3523               default: RRETURN(MATCH_NOMATCH);
3524               case 0x000d:
3525               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3526               break;
3527               case 0x000a:
3528               break;
3529 
3530               case 0x000b:
3531               case 0x000c:
3532               case 0x0085:
3533               case 0x2028:
3534               case 0x2029:
3535               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3536               break;
3537               }
3538             break;
3539 
3540             case OP_NOT_HSPACE:
3541             switch(c)
3542               {
3543               default: break;
3544               case 0x09:      /* HT */
3545               case 0x20:      /* SPACE */
3546               case 0xa0:      /* NBSP */
3547               case 0x1680:    /* OGHAM SPACE MARK */
3548               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3549               case 0x2000:    /* EN QUAD */
3550               case 0x2001:    /* EM QUAD */
3551               case 0x2002:    /* EN SPACE */
3552               case 0x2003:    /* EM SPACE */
3553               case 0x2004:    /* THREE-PER-EM SPACE */
3554               case 0x2005:    /* FOUR-PER-EM SPACE */
3555               case 0x2006:    /* SIX-PER-EM SPACE */
3556               case 0x2007:    /* FIGURE SPACE */
3557               case 0x2008:    /* PUNCTUATION SPACE */
3558               case 0x2009:    /* THIN SPACE */
3559               case 0x200A:    /* HAIR SPACE */
3560               case 0x202f:    /* NARROW NO-BREAK SPACE */
3561               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3562               case 0x3000:    /* IDEOGRAPHIC SPACE */
3563               RRETURN(MATCH_NOMATCH);
3564               }
3565             break;
3566 
3567             case OP_HSPACE:
3568             switch(c)
3569               {
3570               default: RRETURN(MATCH_NOMATCH);
3571               case 0x09:      /* HT */
3572               case 0x20:      /* SPACE */
3573               case 0xa0:      /* NBSP */
3574               case 0x1680:    /* OGHAM SPACE MARK */
3575               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3576               case 0x2000:    /* EN QUAD */
3577               case 0x2001:    /* EM QUAD */
3578               case 0x2002:    /* EN SPACE */
3579               case 0x2003:    /* EM SPACE */
3580               case 0x2004:    /* THREE-PER-EM SPACE */
3581               case 0x2005:    /* FOUR-PER-EM SPACE */
3582               case 0x2006:    /* SIX-PER-EM SPACE */
3583               case 0x2007:    /* FIGURE SPACE */
3584               case 0x2008:    /* PUNCTUATION SPACE */
3585               case 0x2009:    /* THIN SPACE */
3586               case 0x200A:    /* HAIR SPACE */
3587               case 0x202f:    /* NARROW NO-BREAK SPACE */
3588               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3589               case 0x3000:    /* IDEOGRAPHIC SPACE */
3590               break;
3591               }
3592             break;
3593 
3594             case OP_NOT_VSPACE:
3595             switch(c)
3596               {
3597               default: break;
3598               case 0x0a:      /* LF */
3599               case 0x0b:      /* VT */
3600               case 0x0c:      /* FF */
3601               case 0x0d:      /* CR */
3602               case 0x85:      /* NEL */
3603               case 0x2028:    /* LINE SEPARATOR */
3604               case 0x2029:    /* PARAGRAPH SEPARATOR */
3605               RRETURN(MATCH_NOMATCH);
3606               }
3607             break;
3608 
3609             case OP_VSPACE:
3610             switch(c)
3611               {
3612               default: RRETURN(MATCH_NOMATCH);
3613               case 0x0a:      /* LF */
3614               case 0x0b:      /* VT */
3615               case 0x0c:      /* FF */
3616               case 0x0d:      /* CR */
3617               case 0x85:      /* NEL */
3618               case 0x2028:    /* LINE SEPARATOR */
3619               case 0x2029:    /* PARAGRAPH SEPARATOR */
3620               break;
3621               }
3622             break;
3623 
3624             case OP_NOT_DIGIT:
3625             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3626               RRETURN(MATCH_NOMATCH);
3627             break;
3628 
3629             case OP_DIGIT:
3630             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
3631               RRETURN(MATCH_NOMATCH);
3632             break;
3633 
3634             case OP_NOT_WHITESPACE:
3635             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
3636               RRETURN(MATCH_NOMATCH);
3637             break;
3638 
3639             case OP_WHITESPACE:
3640             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
3641               RRETURN(MATCH_NOMATCH);
3642             break;
3643 
3644             case OP_NOT_WORDCHAR:
3645             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
3646               RRETURN(MATCH_NOMATCH);
3647             break;
3648 
3649             case OP_WORDCHAR:
3650             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
3651               RRETURN(MATCH_NOMATCH);
3652             break;
3653 
3654             default:
3655             RRETURN(PCRE_ERROR_INTERNAL);
3656             }
3657           }
3658         }
3659       else
3660 #endif
3661       /* Not UTF-8 mode */
3662         {
3663         for (fi = min;; fi++)
3664           {
3665           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3666           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3667           if (fi >= max || eptr >= md->end_subject ||
3668                (ctype == OP_ANY && IS_NEWLINE(eptr)))
3669             RRETURN(MATCH_NOMATCH);
3670 
3671           c = *eptr++;
3672           switch(ctype)
3673             {
3674             case OP_ANY:     /* This is the non-NL case */
3675             case OP_ALLANY:
3676             case OP_ANYBYTE:
3677             break;
3678 
3679             case OP_ANYNL:
3680             switch(c)
3681               {
3682               default: RRETURN(MATCH_NOMATCH);
3683               case 0x000d:
3684               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3685               break;
3686 
3687               case 0x000a:
3688               break;
3689 
3690               case 0x000b:
3691               case 0x000c:
3692               case 0x0085:
3693               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3694               break;
3695               }
3696             break;
3697 
3698             case OP_NOT_HSPACE:
3699             switch(c)
3700               {
3701               default: break;
3702               case 0x09:      /* HT */
3703               case 0x20:      /* SPACE */
3704               case 0xa0:      /* NBSP */
3705               RRETURN(MATCH_NOMATCH);
3706               }
3707             break;
3708 
3709             case OP_HSPACE:
3710             switch(c)
3711               {
3712               default: RRETURN(MATCH_NOMATCH);
3713               case 0x09:      /* HT */
3714               case 0x20:      /* SPACE */
3715               case 0xa0:      /* NBSP */
3716               break;
3717               }
3718             break;
3719 
3720             case OP_NOT_VSPACE:
3721             switch(c)
3722               {
3723               default: break;
3724               case 0x0a:      /* LF */
3725               case 0x0b:      /* VT */
3726               case 0x0c:      /* FF */
3727               case 0x0d:      /* CR */
3728               case 0x85:      /* NEL */
3729               RRETURN(MATCH_NOMATCH);
3730               }
3731             break;
3732 
3733             case OP_VSPACE:
3734             switch(c)
3735               {
3736               default: RRETURN(MATCH_NOMATCH);
3737               case 0x0a:      /* LF */
3738               case 0x0b:      /* VT */
3739               case 0x0c:      /* FF */
3740               case 0x0d:      /* CR */
3741               case 0x85:      /* NEL */
3742               break;
3743               }
3744             break;
3745 
3746             case OP_NOT_DIGIT:
3747             if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3748             break;
3749 
3750             case OP_DIGIT:
3751             if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3752             break;
3753 
3754             case OP_NOT_WHITESPACE:
3755             if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3756             break;
3757 
3758             case OP_WHITESPACE:
3759             if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3760             break;
3761 
3762             case OP_NOT_WORDCHAR:
3763             if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
3764             break;
3765 
3766             case OP_WORDCHAR:
3767             if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
3768             break;
3769 
3770             default:
3771             RRETURN(PCRE_ERROR_INTERNAL);
3772             }
3773           }
3774         }
3775       /* Control never gets here */
3776       }
3777 
3778     /* If maximizing, it is worth using inline code for speed, doing the type
3779     test once at the start (i.e. keep it out of the loop). Again, keep the
3780     UTF-8 and UCP stuff separate. */
3781 
3782     else
3783       {
3784       pp = eptr;  /* Remember where we started */
3785 
3786 #ifdef SUPPORT_UCP
3787       if (prop_type >= 0)
3788         {
3789         switch(prop_type)
3790           {
3791           case PT_ANY:
3792           for (i = min; i < max; i++)
3793             {
3794             int len = 1;
3795             if (eptr >= md->end_subject) break;
3796             GETCHARLEN(c, eptr, len);
3797             if (prop_fail_result) break;
3798             eptr+= len;
3799             }
3800           break;
3801 
3802           case PT_LAMP:
3803           for (i = min; i < max; i++)
3804             {
3805             int len = 1;
3806             if (eptr >= md->end_subject) break;
3807             GETCHARLEN(c, eptr, len);
3808             prop_chartype = UCD_CHARTYPE(c);
3809             if ((prop_chartype == ucp_Lu ||
3810                  prop_chartype == ucp_Ll ||
3811                  prop_chartype == ucp_Lt) == prop_fail_result)
3812               break;
3813             eptr+= len;
3814             }
3815           break;
3816 
3817           case PT_GC:
3818           for (i = min; i < max; i++)
3819             {
3820             int len = 1;
3821             if (eptr >= md->end_subject) break;
3822             GETCHARLEN(c, eptr, len);
3823             prop_category = UCD_CATEGORY(c);
3824             if ((prop_category == prop_value) == prop_fail_result)
3825               break;
3826             eptr+= len;
3827             }
3828           break;
3829 
3830           case PT_PC:
3831           for (i = min; i < max; i++)
3832             {
3833             int len = 1;
3834             if (eptr >= md->end_subject) break;
3835             GETCHARLEN(c, eptr, len);
3836             prop_chartype = UCD_CHARTYPE(c);
3837             if ((prop_chartype == prop_value) == prop_fail_result)
3838               break;
3839             eptr+= len;
3840             }
3841           break;
3842 
3843           case PT_SC:
3844           for (i = min; i < max; i++)
3845             {
3846             int len = 1;
3847             if (eptr >= md->end_subject) break;
3848             GETCHARLEN(c, eptr, len);
3849             prop_script = UCD_SCRIPT(c);
3850             if ((prop_script == prop_value) == prop_fail_result)
3851               break;
3852             eptr+= len;
3853             }
3854           break;
3855           }
3856 
3857         /* eptr is now past the end of the maximum run */
3858 
3859         if (possessive) continue;
3860         for(;;)
3861           {
3862           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3863           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3864           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3865           if (utf8) BACKCHAR(eptr);
3866           }
3867         }
3868 
3869       /* Match extended Unicode sequences. We will get here only if the
3870       support is in the binary; otherwise a compile-time error occurs. */
3871 
3872       else if (ctype == OP_EXTUNI)
3873         {
3874         for (i = min; i < max; i++)
3875           {
3876           if (eptr >= md->end_subject) break;
3877           GETCHARINCTEST(c, eptr);
3878           prop_category = UCD_CATEGORY(c);
3879           if (prop_category == ucp_M) break;
3880           while (eptr < md->end_subject)
3881             {
3882             int len = 1;
3883             if (!utf8) c = *eptr; else
3884               {
3885               GETCHARLEN(c, eptr, len);
3886               }
3887             prop_category = UCD_CATEGORY(c);
3888             if (prop_category != ucp_M) break;
3889             eptr += len;
3890             }
3891           }
3892 
3893         /* eptr is now past the end of the maximum run */
3894 
3895         if (possessive) continue;
3896         for(;;)
3897           {
3898           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3899           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3900           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3901           for (;;)                        /* Move back over one extended */
3902             {
3903             int len = 1;
3904             if (!utf8) c = *eptr; else
3905               {
3906               BACKCHAR(eptr);
3907               GETCHARLEN(c, eptr, len);
3908               }
3909             prop_category = UCD_CATEGORY(c);
3910             if (prop_category != ucp_M) break;
3911             eptr--;
3912             }
3913           }
3914         }
3915 
3916       else
3917 #endif   /* SUPPORT_UCP */
3918 
3919 #ifdef SUPPORT_UTF8
3920       /* UTF-8 mode */
3921 
3922       if (utf8)
3923         {
3924         switch(ctype)
3925           {
3926           case OP_ANY:
3927           if (max < INT_MAX)
3928             {
3929             for (i = min; i < max; i++)
3930               {
3931               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3932               eptr++;
3933               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3934               }
3935             }
3936 
3937           /* Handle unlimited UTF-8 repeat */
3938 
3939           else
3940             {
3941             for (i = min; i < max; i++)
3942               {
3943               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3944               eptr++;
3945               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3946               }
3947             }
3948           break;
3949 
3950           case OP_ALLANY:
3951           if (max < INT_MAX)
3952             {
3953             for (i = min; i < max; i++)
3954               {
3955               if (eptr >= md->end_subject) break;
3956               eptr++;
3957               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3958               }
3959             }
3960           else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
3961           break;
3962 
3963           /* The byte case is the same as non-UTF8 */
3964 
3965           case OP_ANYBYTE:
3966           c = max - min;
3967           if (c > (unsigned int)(md->end_subject - eptr))
3968             c = md->end_subject - eptr;
3969           eptr += c;
3970           break;
3971 
3972           case OP_ANYNL:
3973           for (i = min; i < max; i++)
3974             {
3975             int len = 1;
3976             if (eptr >= md->end_subject) break;
3977             GETCHARLEN(c, eptr, len);
3978             if (c == 0x000d)
3979               {
3980               if (++eptr >= md->end_subject) break;
3981               if (*eptr == 0x000a) eptr++;
3982               }
3983             else
3984               {
3985               if (c != 0x000a &&
3986                   (md->bsr_anycrlf ||
3987                    (c != 0x000b && c != 0x000c &&
3988                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
3989                 break;
3990               eptr += len;
3991               }
3992             }
3993           break;
3994 
3995           case OP_NOT_HSPACE:
3996           case OP_HSPACE:
3997           for (i = min; i < max; i++)
3998             {
3999             BOOL gotspace;
4000             int len = 1;
4001             if (eptr >= md->end_subject) break;
4002             GETCHARLEN(c, eptr, len);
4003             switch(c)
4004               {
4005               default: gotspace = FALSE; break;
4006               case 0x09:      /* HT */
4007               case 0x20:      /* SPACE */
4008               case 0xa0:      /* NBSP */
4009               case 0x1680:    /* OGHAM SPACE MARK */
4010               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4011               case 0x2000:    /* EN QUAD */
4012               case 0x2001:    /* EM QUAD */
4013               case 0x2002:    /* EN SPACE */
4014               case 0x2003:    /* EM SPACE */
4015               case 0x2004:    /* THREE-PER-EM SPACE */
4016               case 0x2005:    /* FOUR-PER-EM SPACE */
4017               case 0x2006:    /* SIX-PER-EM SPACE */
4018               case 0x2007:    /* FIGURE SPACE */
4019               case 0x2008:    /* PUNCTUATION SPACE */
4020               case 0x2009:    /* THIN SPACE */
4021               case 0x200A:    /* HAIR SPACE */
4022               case 0x202f:    /* NARROW NO-BREAK SPACE */
4023               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4024               case 0x3000:    /* IDEOGRAPHIC SPACE */
4025               gotspace = TRUE;
4026               break;
4027               }
4028             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
4029             eptr += len;
4030             }
4031           break;
4032 
4033           case OP_NOT_VSPACE:
4034           case OP_VSPACE:
4035           for (i = min; i < max; i++)
4036             {
4037             BOOL gotspace;
4038             int len = 1;
4039             if (eptr >= md->end_subject) break;
4040             GETCHARLEN(c, eptr, len);
4041             switch(c)
4042               {
4043               default: gotspace = FALSE; break;
4044               case 0x0a:      /* LF */
4045               case 0x0b:      /* VT */
4046               case 0x0c:      /* FF */
4047               case 0x0d:      /* CR */
4048               case 0x85:      /* NEL */
4049               case 0x2028:    /* LINE SEPARATOR */
4050               case 0x2029:    /* PARAGRAPH SEPARATOR */
4051               gotspace = TRUE;
4052               break;
4053               }
4054             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
4055             eptr += len;
4056             }
4057           break;
4058 
4059           case OP_NOT_DIGIT:
4060           for (i = min; i < max; i++)
4061             {
4062             int len = 1;
4063             if (eptr >= md->end_subject) break;
4064             GETCHARLEN(c, eptr, len);
4065             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
4066             eptr+= len;
4067             }
4068           break;
4069 
4070           case OP_DIGIT:
4071           for (i = min; i < max; i++)
4072             {
4073             int len = 1;
4074             if (eptr >= md->end_subject) break;
4075             GETCHARLEN(c, eptr, len);
4076             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
4077             eptr+= len;
4078             }
4079           break;
4080 
4081           case OP_NOT_WHITESPACE:
4082           for (i = min; i < max; i++)
4083             {
4084             int len = 1;
4085             if (eptr >= md->end_subject) break;
4086             GETCHARLEN(c, eptr, len);
4087             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
4088             eptr+= len;
4089             }
4090           break;
4091 
4092           case OP_WHITESPACE:
4093           for (i = min; i < max; i++)
4094             {
4095             int len = 1;
4096             if (eptr >= md->end_subject) break;
4097             GETCHARLEN(c, eptr, len);
4098             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
4099             eptr+= len;
4100             }
4101           break;
4102 
4103           case OP_NOT_WORDCHAR:
4104           for (i = min; i < max; i++)
4105             {
4106             int len = 1;
4107             if (eptr >= md->end_subject) break;
4108             GETCHARLEN(c, eptr, len);
4109             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
4110             eptr+= len;
4111             }
4112           break;
4113 
4114           case OP_WORDCHAR:
4115           for (i = min; i < max; i++)
4116             {
4117             int len = 1;
4118             if (eptr >= md->end_subject) break;
4119             GETCHARLEN(c, eptr, len);
4120             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
4121             eptr+= len;
4122             }
4123           break;
4124 
4125           default:
4126           RRETURN(PCRE_ERROR_INTERNAL);
4127           }
4128 
4129         /* eptr is now past the end of the maximum run */
4130 
4131         if (possessive) continue;
4132         for(;;)
4133           {
4134           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
4135           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4136           if (eptr-- == pp) break;        /* Stop if tried at original pos */
4137           BACKCHAR(eptr);
4138           }
4139         }
4140       else
4141 #endif  /* SUPPORT_UTF8 */
4142 
4143       /* Not UTF-8 mode */
4144         {
4145         switch(ctype)
4146           {
4147           case OP_ANY:
4148           for (i = min; i < max; i++)
4149             {
4150             if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4151             eptr++;
4152             }
4153           break;
4154 
4155           case OP_ALLANY:
4156           case OP_ANYBYTE:
4157           c = max - min;
4158           if (c > (unsigned int)(md->end_subject - eptr))
4159             c = md->end_subject - eptr;
4160           eptr += c;
4161           break;
4162 
4163           case OP_ANYNL:
4164           for (i = min; i < max; i++)
4165             {
4166             if (eptr >= md->end_subject) break;
4167             c = *eptr;
4168             if (c == 0x000d)
4169               {
4170               if (++eptr >= md->end_subject) break;
4171               if (*eptr == 0x000a) eptr++;
4172               }
4173             else
4174               {
4175               if (c != 0x000a &&
4176                   (md->bsr_anycrlf ||
4177                     (c != 0x000b && c != 0x000c && c != 0x0085)))
4178                 break;
4179               eptr++;
4180               }
4181             }
4182           break;
4183 
4184           case OP_NOT_HSPACE:
4185           for (i = min; i < max; i++)
4186             {
4187             if (eptr >= md->end_subject) break;
4188             c = *eptr;
4189             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4190             eptr++;
4191             }
4192           break;
4193 
4194           case OP_HSPACE:
4195           for (i = min; i < max; i++)
4196             {
4197             if (eptr >= md->end_subject) break;
4198             c = *eptr;
4199             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4200             eptr++;
4201             }
4202           break;
4203 
4204           case OP_NOT_VSPACE:
4205           for (i = min; i < max; i++)
4206             {
4207             if (eptr >= md->end_subject) break;
4208             c = *eptr;
4209             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4210               break;
4211             eptr++;
4212             }
4213           break;
4214 
4215           case OP_VSPACE:
4216           for (i = min; i < max; i++)
4217             {
4218             if (eptr >= md->end_subject) break;
4219             c = *eptr;
4220             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4221               break;
4222             eptr++;
4223             }
4224           break;
4225 
4226           case OP_NOT_DIGIT:
4227           for (i = min; i < max; i++)
4228             {
4229             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4230               break;
4231             eptr++;
4232             }
4233           break;
4234 
4235           case OP_DIGIT:
4236           for (i = min; i < max; i++)
4237             {
4238             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4239               break;
4240             eptr++;
4241             }
4242           break;
4243 
4244           case OP_NOT_WHITESPACE:
4245           for (i = min; i < max; i++)
4246             {
4247             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4248               break;
4249             eptr++;
4250             }
4251           break;
4252 
4253           case OP_WHITESPACE:
4254           for (i = min; i < max; i++)
4255             {
4256             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4257               break;
4258             eptr++;
4259             }
4260           break;
4261 
4262           case OP_NOT_WORDCHAR:
4263           for (i = min; i < max; i++)
4264             {
4265             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4266               break;
4267             eptr++;
4268             }
4269           break;
4270 
4271           case OP_WORDCHAR:
4272           for (i = min; i < max; i++)
4273             {
4274             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4275               break;
4276             eptr++;
4277             }
4278           break;
4279 
4280           default:
4281           RRETURN(PCRE_ERROR_INTERNAL);
4282           }
4283 
4284         /* eptr is now past the end of the maximum run */
4285 
4286         if (possessive) continue;
4287         while (eptr >= pp)
4288           {
4289           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4290           eptr--;
4291           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4292           }
4293         }
4294 
4295       /* Get here if we can't make it match with any permitted repetitions */
4296 
4297       RRETURN(MATCH_NOMATCH);
4298       }
4299     /* Control never gets here */
4300 
4301     /* There's been some horrible disaster. Arrival here can only mean there is
4302     something seriously wrong in the code above or the OP_xxx definitions. */
4303 
4304     default:
4305     DPRINTF(("Unknown opcode %d\n", *ecode));
4306     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
4307     }
4308 
4309   /* Do not stick any code in here without much thought; it is assumed
4310   that "continue" in the code above comes out to here to repeat the main
4311   loop. */
4312 
4313   }             /* End of main loop */
4314 /* Control never reaches here */
4315 
4316 
4317 /* When compiling to use the heap rather than the stack for recursive calls to
4318 match(), the RRETURN() macro jumps here. The number that is saved in
4319 frame->Xwhere indicates which label we actually want to return to. */
4320 
4321 #ifdef NO_RECURSE
4322 #define LBL(val) case val: goto L_RM##val;
4323 HEAP_RETURN:
4324 switch (frame->Xwhere)
4325   {
4326   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4327   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
4328   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
4329   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
4330   LBL(53) LBL(54)
4331 #ifdef SUPPORT_UTF8
4332   LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
4333   LBL(32) LBL(34) LBL(42) LBL(46)
4334 #ifdef SUPPORT_UCP
4335   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
4336 #endif  /* SUPPORT_UCP */
4337 #endif  /* SUPPORT_UTF8 */
4338   default:
4339   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4340   return PCRE_ERROR_INTERNAL;
4341   }
4342 #undef LBL
4343 #endif  /* NO_RECURSE */
4344 }
4345 
4346 
4347 /***************************************************************************
4348 ****************************************************************************
4349                    RECURSION IN THE match() FUNCTION
4350 
4351 Undefine all the macros that were defined above to handle this. */
4352 
4353 #ifdef NO_RECURSE
4354 #undef eptr
4355 #undef ecode
4356 #undef mstart
4357 #undef offset_top
4358 #undef ims
4359 #undef eptrb
4360 #undef flags
4361 
4362 #undef callpat
4363 #undef charptr
4364 #undef data
4365 #undef next
4366 #undef pp
4367 #undef prev
4368 #undef saved_eptr
4369 
4370 #undef new_recursive
4371 
4372 #undef cur_is_word
4373 #undef condition
4374 #undef prev_is_word
4375 
4376 #undef original_ims
4377 
4378 #undef ctype
4379 #undef length
4380 #undef max
4381 #undef min
4382 #undef number
4383 #undef offset
4384 #undef op
4385 #undef save_capture_last
4386 #undef save_offset1
4387 #undef save_offset2
4388 #undef save_offset3
4389 #undef stacksave
4390 
4391 #undef newptrb
4392 
4393 #endif
4394 
4395 /* These two are defined as macros in both cases */
4396 
4397 #undef fc
4398 #undef fi
4399 
4400 /***************************************************************************
4401 ***************************************************************************/
4402 
4403 
4404 
4405 /*************************************************
4406 *         Execute a Regular Expression           *
4407 *************************************************/
4408 
4409 /* This function applies a compiled re to a subject string and picks out
4410 portions of the string if it matches. Two elements in the vector are set for
4411 each substring: the offsets to the start and end of the substring.
4412 
4413 Arguments:
4414   argument_re     points to the compiled expression
4415   extra_data      points to extra data or is NULL
4416   subject         points to the subject string
4417   length          length of subject string (may contain binary zeros)
4418   start_offset    where to start in the subject string
4419   options         option bits
4420   offsets         points to a vector of ints to be filled in with offsets
4421   offsetcount     the number of elements in the vector
4422 
4423 Returns:          > 0 => success; value is the number of elements filled in
4424                   = 0 => success, but offsets is not big enough
4425                    -1 => failed to match
4426                  < -1 => some kind of unexpected problem
4427 */
4428 
4429 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
4430 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4431   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4432   int offsetcount)
4433 {
4434 int rc, resetcount, ocount;
4435 int first_byte = -1;
4436 int req_byte = -1;
4437 int req_byte2 = -1;
4438 int newline;
4439 unsigned long int ims;
4440 BOOL using_temporary_offsets = FALSE;
4441 BOOL anchored;
4442 BOOL startline;
4443 BOOL firstline;
4444 BOOL first_byte_caseless = FALSE;
4445 BOOL req_byte_caseless = FALSE;
4446 BOOL utf8;
4447 match_data match_block;
4448 match_data *md = &match_block;
4449 const uschar *tables;
4450 const uschar *start_bits = NULL;
4451 USPTR start_match = (USPTR)subject + start_offset;
4452 USPTR end_subject;
4453 USPTR req_byte_ptr = start_match - 1;
4454 
4455 pcre_study_data internal_study;
4456 const pcre_study_data *study;
4457 
4458 real_pcre internal_re;
4459 const real_pcre *external_re = (const real_pcre *)argument_re;
4460 const real_pcre *re = external_re;
4461 
4462 /* Plausibility checks */
4463 
4464 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
4465 if (re == NULL || subject == NULL ||
4466    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
4467 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
4468 
4469 /* Fish out the optional data from the extra_data structure, first setting
4470 the default values. */
4471 
4472 study = NULL;
4473 md->match_limit = MATCH_LIMIT;
4474 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
4475 md->callout_data = NULL;
4476 
4477 /* The table pointer is always in native byte order. */
4478 
4479 tables = external_re->tables;
4480 
4481 if (extra_data != NULL)
4482   {
4483   register unsigned int flags = extra_data->flags;
4484   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
4485     study = (const pcre_study_data *)extra_data->study_data;
4486   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
4487     md->match_limit = extra_data->match_limit;
4488   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
4489     md->match_limit_recursion = extra_data->match_limit_recursion;
4490   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
4491     md->callout_data = extra_data->callout_data;
4492   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
4493   }
4494 
4495 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
4496 is a feature that makes it possible to save compiled regex and re-use them
4497 in other programs later. */
4498 
4499 if (tables == NULL) tables = _pcre_default_tables;
4500 
4501 /* Check that the first field in the block is the magic number. If it is not,
4502 test for a regex that was compiled on a host of opposite endianness. If this is
4503 the case, flipped values are put in internal_re and internal_study if there was
4504 study data too. */
4505 
4506 if (re->magic_number != MAGIC_NUMBER)
4507   {
4508   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
4509   if (re == NULL) return PCRE_ERROR_BADMAGIC;
4510   if (study != NULL) study = &internal_study;
4511   }
4512 
4513 /* Set up other data */
4514 
4515 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
4516 startline = (re->flags & PCRE_STARTLINE) != 0;
4517 firstline = (re->options & PCRE_FIRSTLINE) != 0;
4518 
4519 /* The code starts after the real_pcre block and the capture name table. */
4520 
4521 md->start_code = (const uschar *)external_re + re->name_table_offset +
4522   re->name_count * re->name_entry_size;
4523 
4524 md->start_subject = (USPTR)subject;
4525 md->start_offset = start_offset;
4526 md->end_subject = md->start_subject + length;
4527 end_subject = md->end_subject;
4528 
4529 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4530 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4531 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
4532 
4533 md->notbol = (options & PCRE_NOTBOL) != 0;
4534 md->noteol = (options & PCRE_NOTEOL) != 0;
4535 md->notempty = (options & PCRE_NOTEMPTY) != 0;
4536 md->partial = (options & PCRE_PARTIAL) != 0;
4537 md->hitend = FALSE;
4538 
4539 md->recursive = NULL;                   /* No recursion at top level */
4540 
4541 md->lcc = tables + lcc_offset;
4542 md->ctypes = tables + ctypes_offset;
4543 
4544 /* Handle different \R options. */
4545 
4546 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
4547   {
4548   case 0:
4549   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
4550     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
4551   else
4552 #ifdef BSR_ANYCRLF
4553   md->bsr_anycrlf = TRUE;
4554 #else
4555   md->bsr_anycrlf = FALSE;
4556 #endif
4557   break;
4558 
4559   case PCRE_BSR_ANYCRLF:
4560   md->bsr_anycrlf = TRUE;
4561   break;
4562 
4563   case PCRE_BSR_UNICODE:
4564   md->bsr_anycrlf = FALSE;
4565   break;
4566 
4567   default: return PCRE_ERROR_BADNEWLINE;
4568   }
4569 
4570 /* Handle different types of newline. The three bits give eight cases. If
4571 nothing is set at run time, whatever was used at compile time applies. */
4572 
4573 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
4574         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
4575   {
4576   case 0: newline = NEWLINE; break;   /* Compile-time default */
4577   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
4578   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
4579   case PCRE_NEWLINE_CR+
4580        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
4581   case PCRE_NEWLINE_ANY: newline = -1; break;
4582   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4583   default: return PCRE_ERROR_BADNEWLINE;
4584   }
4585 
4586 if (newline == -2)
4587   {
4588   md->nltype = NLTYPE_ANYCRLF;
4589   }
4590 else if (newline < 0)
4591   {
4592   md->nltype = NLTYPE_ANY;
4593   }
4594 else
4595   {
4596   md->nltype = NLTYPE_FIXED;
4597   if (newline > 255)
4598     {
4599     md->nllen = 2;
4600     md->nl[0] = (newline >> 8) & 255;
4601     md->nl[1] = newline & 255;
4602     }
4603   else
4604     {
4605     md->nllen = 1;
4606     md->nl[0] = newline;
4607     }
4608   }
4609 
4610 /* Partial matching is supported only for a restricted set of regexes at the
4611 moment. */
4612 
4613 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
4614   return PCRE_ERROR_BADPARTIAL;
4615 
4616 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
4617 back the character offset. */
4618 
4619 #ifdef SUPPORT_UTF8
4620 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
4621   {
4622   if (_pcre_valid_utf8((USPTR)subject, length) >= 0)
4623     return PCRE_ERROR_BADUTF8;
4624   if (start_offset > 0 && start_offset < length)
4625     {
4626     int tb = ((USPTR)subject)[start_offset];
4627     if (tb > 127)
4628       {
4629       tb &= 0xc0;
4630       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
4631       }
4632     }
4633   }
4634 #endif
4635 
4636 /* The ims options can vary during the matching as a result of the presence
4637 of (?ims) items in the pattern. They are kept in a local variable so that
4638 restoring at the exit of a group is easy. */
4639 
4640 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
4641 
4642 /* If the expression has got more back references than the offsets supplied can
4643 hold, we get a temporary chunk of working store to use during the matching.
4644 Otherwise, we can use the vector supplied, rounding down its size to a multiple
4645 of 3. */
4646 
4647 ocount = offsetcount - (offsetcount % 3);
4648 
4649 if (re->top_backref > 0 && re->top_backref >= ocount/3)
4650   {
4651   ocount = re->top_backref * 3 + 3;
4652   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
4653   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
4654   using_temporary_offsets = TRUE;
4655   DPRINTF(("Got memory to hold back references\n"));
4656   }
4657 else md->offset_vector = offsets;
4658 
4659 md->offset_end = ocount;
4660 md->offset_max = (2*ocount)/3;
4661 md->offset_overflow = FALSE;
4662 md->capture_last = -1;
4663 
4664 /* Compute the minimum number of offsets that we need to reset each time. Doing
4665 this makes a huge difference to execution time when there aren't many brackets
4666 in the pattern. */
4667 
4668 resetcount = 2 + re->top_bracket * 2;
4669 if (resetcount > offsetcount) resetcount = ocount;
4670 
4671 /* Reset the working variable associated with each extraction. These should
4672 never be used unless previously set, but they get saved and restored, and so we
4673 initialize them to avoid reading uninitialized locations. */
4674 
4675 if (md->offset_vector != NULL)
4676   {
4677   register int *iptr = md->offset_vector + ocount;
4678   register int *iend = iptr - resetcount/2 + 1;
4679   while (--iptr >= iend) *iptr = -1;
4680   }
4681 
4682 /* Set up the first character to match, if available. The first_byte value is
4683 never set for an anchored regular expression, but the anchoring may be forced
4684 at run time, so we have to test for anchoring. The first char may be unset for
4685 an unanchored pattern, of course. If there's no first char and the pattern was
4686 studied, there may be a bitmap of possible first characters. */
4687 
4688 if (!anchored)
4689   {
4690   if ((re->flags & PCRE_FIRSTSET) != 0)
4691     {
4692     first_byte = re->first_byte & 255;
4693     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
4694       first_byte = md->lcc[first_byte];
4695     }
4696   else
4697     if (!startline && study != NULL &&
4698       (study->options & PCRE_STUDY_MAPPED) != 0)
4699         start_bits = study->start_bits;
4700   }
4701 
4702 /* For anchored or unanchored matches, there may be a "last known required
4703 character" set. */
4704 
4705 if ((re->flags & PCRE_REQCHSET) != 0)
4706   {
4707   req_byte = re->req_byte & 255;
4708   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
4709   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
4710   }
4711 
4712 
4713 /* ==========================================================================*/
4714 
4715 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4716 the loop runs just once. */
4717 
4718 for(;;)
4719   {
4720   USPTR save_end_subject = end_subject;
4721   USPTR new_start_match;
4722 
4723   /* Reset the maximum number of extractions we might see. */
4724 
4725   if (md->offset_vector != NULL)
4726     {
4727     register int *iptr = md->offset_vector;
4728     register int *iend = iptr + resetcount;
4729     while (iptr < iend) *iptr++ = -1;
4730     }
4731 
4732   /* If firstline is TRUE, the start of the match is constrained to the first
4733   line of a multiline string. That is, the match must be before or at the first
4734   newline. Implement this by temporarily adjusting end_subject so that we stop
4735   scanning at a newline. If the match fails at the newline, later code breaks
4736   this loop. */
4737 
4738   if (firstline)
4739     {
4740     USPTR t = start_match;
4741 #ifdef SUPPORT_UTF8
4742     if (utf8)
4743       {
4744       while (t < md->end_subject && !IS_NEWLINE(t))
4745         {
4746         t++;
4747         while (t < end_subject && (*t & 0xc0) == 0x80) t++;
4748         }
4749       }
4750     else
4751 #endif
4752     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4753     end_subject = t;
4754     }
4755 
4756   /* There are some optimizations that avoid running the match if a known
4757   starting point is not found, or if a known later character is not present.
4758   However, there is an option that disables these, for testing and for ensuring
4759   that all callouts do actually occur. */
4760 
4761   if ((options & PCRE_NO_START_OPTIMIZE) == 0)
4762     {
4763     /* Advance to a unique first byte if there is one. */
4764 
4765     if (first_byte >= 0)
4766       {
4767       if (first_byte_caseless)
4768         while (start_match < end_subject && md->lcc[*start_match] != first_byte)
4769           start_match++;
4770       else
4771         while (start_match < end_subject && *start_match != first_byte)
4772           start_match++;
4773       }
4774 
4775     /* Or to just after a linebreak for a multiline match */
4776 
4777     else if (startline)
4778       {
4779       if (start_match > md->start_subject + start_offset)
4780         {
4781 #ifdef SUPPORT_UTF8
4782         if (utf8)
4783           {
4784           while (start_match < end_subject && !WAS_NEWLINE(start_match))
4785             {
4786             start_match++;
4787             while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4788               start_match++;
4789             }
4790           }
4791         else
4792 #endif
4793         while (start_match < end_subject && !WAS_NEWLINE(start_match))
4794           start_match++;
4795 
4796         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4797         and we are now at a LF, advance the match position by one more character.
4798         */
4799 
4800         if (start_match[-1] == CHAR_CR &&
4801              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4802              start_match < end_subject &&
4803              *start_match == CHAR_NL)
4804           start_match++;
4805         }
4806       }
4807 
4808     /* Or to a non-unique first byte after study */
4809 
4810     else if (start_bits != NULL)
4811       {
4812       while (start_match < end_subject)
4813         {
4814         register unsigned int c = *start_match;
4815         if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
4816           else break;
4817         }
4818       }
4819     }   /* Starting optimizations */
4820 
4821   /* Restore fudged end_subject */
4822 
4823   end_subject = save_end_subject;
4824 
4825 #ifdef DEBUG  /* Sigh. Some compilers never learn. */
4826   printf(">>>> Match against: ");
4827   pchars(start_match, end_subject - start_match, TRUE, md);
4828   printf("\n");
4829 #endif
4830 
4831   /* If req_byte is set, we know that that character must appear in the
4832   subject for the match to succeed. If the first character is set, req_byte
4833   must be later in the subject; otherwise the test starts at the match point.
4834   This optimization can save a huge amount of backtracking in patterns with
4835   nested unlimited repeats that aren't going to match. Writing separate code
4836   for cased/caseless versions makes it go faster, as does using an
4837   autoincrement and backing off on a match.
4838 
4839   HOWEVER: when the subject string is very, very long, searching to its end
4840   can take a long time, and give bad performance on quite ordinary patterns.
4841   This showed up when somebody was matching something like /^\d+C/ on a
4842   32-megabyte string... so we don't do this when the string is sufficiently
4843   long.
4844 
4845   ALSO: this processing is disabled when partial matching is requested, or if
4846   disabling is explicitly requested. */
4847 
4848   if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
4849       req_byte >= 0 &&
4850       end_subject - start_match < REQ_BYTE_MAX &&
4851       !md->partial)
4852     {
4853     register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4854 
4855     /* We don't need to repeat the search if we haven't yet reached the
4856     place we found it at last time. */
4857 
4858     if (p > req_byte_ptr)
4859       {
4860       if (req_byte_caseless)
4861         {
4862         while (p < end_subject)
4863           {
4864           register int pp = *p++;
4865           if (pp == req_byte || pp == req_byte2) { p--; break; }
4866           }
4867         }
4868       else
4869         {
4870         while (p < end_subject)
4871           {
4872           if (*p++ == req_byte) { p--; break; }
4873           }
4874         }
4875 
4876       /* If we can't find the required character, break the matching loop,
4877       forcing a match failure. */
4878 
4879       if (p >= end_subject)
4880         {
4881         rc = MATCH_NOMATCH;
4882         break;
4883         }
4884 
4885       /* If we have found the required character, save the point where we
4886       found it, so that we don't search again next time round the loop if
4887       the start hasn't passed this character yet. */
4888 
4889       req_byte_ptr = p;
4890       }
4891     }
4892 
4893   /* OK, we can now run the match. */
4894 
4895   md->start_match_ptr = start_match;
4896   md->match_call_count = 0;
4897   rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
4898 
4899   switch(rc)
4900     {
4901     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
4902     exactly like PRUNE. */
4903 
4904     case MATCH_NOMATCH:
4905     case MATCH_PRUNE:
4906     case MATCH_THEN:
4907     new_start_match = start_match + 1;
4908 #ifdef SUPPORT_UTF8
4909     if (utf8)
4910       while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
4911         new_start_match++;
4912 #endif
4913     break;
4914 
4915     /* SKIP passes back the next starting point explicitly. */
4916 
4917     case MATCH_SKIP:
4918     new_start_match = md->start_match_ptr;
4919     break;
4920 
4921     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
4922 
4923     case MATCH_COMMIT:
4924     rc = MATCH_NOMATCH;
4925     goto ENDLOOP;
4926 
4927     /* Any other return is some kind of error. */
4928 
4929     default:
4930     goto ENDLOOP;
4931     }
4932 
4933   /* Control reaches here for the various types of "no match at this point"
4934   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
4935 
4936   rc = MATCH_NOMATCH;
4937 
4938   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4939   newline in the subject (though it may continue over the newline). Therefore,
4940   if we have just failed to match, starting at a newline, do not continue. */
4941 
4942   if (firstline && IS_NEWLINE(start_match)) break;
4943 
4944   /* Advance to new matching position */
4945 
4946   start_match = new_start_match;
4947 
4948   /* Break the loop if the pattern is anchored or if we have passed the end of
4949   the subject. */
4950 
4951   if (anchored || start_match > end_subject) break;
4952 
4953   /* If we have just passed a CR and we are now at a LF, and the pattern does
4954   not contain any explicit matches for \r or \n, and the newline option is CRLF
4955   or ANY or ANYCRLF, advance the match position by one more character. */
4956 
4957   if (start_match[-1] == CHAR_CR &&
4958       start_match < end_subject &&
4959       *start_match == CHAR_NL &&
4960       (re->flags & PCRE_HASCRORLF) == 0 &&
4961         (md->nltype == NLTYPE_ANY ||
4962          md->nltype == NLTYPE_ANYCRLF ||
4963          md->nllen == 2))
4964     start_match++;
4965 
4966   }   /* End of for(;;) "bumpalong" loop */
4967 
4968 /* ==========================================================================*/
4969 
4970 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4971 conditions is true:
4972 
4973 (1) The pattern is anchored or the match was failed by (*COMMIT);
4974 
4975 (2) We are past the end of the subject;
4976 
4977 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4978     this option requests that a match occur at or before the first newline in
4979     the subject.
4980 
4981 When we have a match and the offset vector is big enough to deal with any
4982 backreferences, captured substring offsets will already be set up. In the case
4983 where we had to get some local store to hold offsets for backreference
4984 processing, copy those that we can. In this case there need not be overflow if
4985 certain parts of the pattern were not used, even though there are more
4986 capturing parentheses than vector slots. */
4987 
4988 ENDLOOP:
4989 
4990 if (rc == MATCH_MATCH)
4991   {
4992   if (using_temporary_offsets)
4993     {
4994     if (offsetcount >= 4)
4995       {
4996       memcpy(offsets + 2, md->offset_vector + 2,
4997         (offsetcount - 2) * sizeof(int));
4998       DPRINTF(("Copied offsets from temporary memory\n"));
4999       }
5000     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
5001     DPRINTF(("Freeing temporary memory\n"));
5002     (pcre_free)(md->offset_vector);
5003     }
5004 
5005   /* Set the return code to the number of captured strings, or 0 if there are
5006   too many to fit into the vector. */
5007 
5008   rc = md->offset_overflow? 0 : md->end_offset_top/2;
5009 
5010   /* If there is space, set up the whole thing as substring 0. The value of
5011   md->start_match_ptr might be modified if \K was encountered on the success
5012   matching path. */
5013 
5014   if (offsetcount < 2) rc = 0; else
5015     {
5016     offsets[0] = md->start_match_ptr - md->start_subject;
5017     offsets[1] = md->end_match_ptr - md->start_subject;
5018     }
5019 
5020   DPRINTF((">>>> returning %d\n", rc));
5021   return rc;
5022   }
5023 
5024 /* Control gets here if there has been an error, or if the overall match
5025 attempt has failed at all permitted starting positions. */
5026 
5027 if (using_temporary_offsets)
5028   {
5029   DPRINTF(("Freeing temporary memory\n"));
5030   (pcre_free)(md->offset_vector);
5031   }
5032 
5033 if (rc != MATCH_NOMATCH)
5034   {
5035   DPRINTF((">>>> error: returning %d\n", rc));
5036   return rc;
5037   }
5038 else if (md->partial && md->hitend)
5039   {
5040   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
5041   return PCRE_ERROR_PARTIAL;
5042   }
5043 else
5044   {
5045   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
5046   return PCRE_ERROR_NOMATCH;
5047   }
5048 }
5049 
5050 /* End of pcre_exec.c */
5051 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.