Blob Blame History Raw
diff -up unzip60/match.c.recmatch unzip60/match.c
--- unzip60/match.c.recmatch	2005-08-14 13:00:36.000000000 -0400
+++ unzip60/match.c	2013-05-28 10:29:57.949077543 -0400
@@ -27,16 +27,14 @@
 
   ---------------------------------------------------------------------------
 
-  Copyright on recmatch() from Zip's util.c (although recmatch() was almost
-  certainly written by Mark Adler...ask me how I can tell :-) ):
+  Copyright on recmatch() from Zip's util.c
+	 Copyright (c) 1990-2005 Info-ZIP.  All rights reserved.
 
-     Copyright (C) 1990-1992 Mark Adler, Richard B. Wales, Jean-loup Gailly,
-     Kai Uwe Rommel and Igor Mandrichenko.
+	 See the accompanying file LICENSE, version 2004-May-22 or later
+	 for terms of use.
+	 If, for some reason, both of these files are missing, the Info-ZIP license
+	 also may be found at:  ftp://ftp.info-zip.org/pub/infozip/license.html  
 
-     Permission is granted to any individual or institution to use, copy,
-     or redistribute this software so long as all of the original files are
-     included unmodified, that it is not sold for profit, and that this copy-
-     right notice is retained.
 
   ---------------------------------------------------------------------------
 
@@ -53,7 +51,7 @@
 
   A set is composed of characters or ranges; a range looks like ``character
   hyphen character'' (as in 0-9 or A-Z).  [0-9a-zA-Z_] is the minimal set of
-  characters allowed in the [..] pattern construct.  Other characters are
+  characters ALlowed in the [..] pattern construct.  Other characters are
   allowed (i.e., 8-bit characters) if your system will support them.
 
   To suppress the special syntactic significance of any of ``[]*?!^-\'', in-
@@ -101,8 +99,32 @@
 #  define WILDCHAR   '?'
 #  define BEG_RANGE  '['
 #  define END_RANGE  ']'
+#  define WILDCHR_SINGLE '?'
+#  define DIRSEP_CHR '/'
+#  define WILDCHR_MULTI '*'
 #endif
 
+#ifdef WILD_STOP_AT_DIR
+   int wild_stop_at_dir = 1; /* default wildcards do not include / in matches */
+#else
+   int wild_stop_at_dir = 0; /* default wildcards do include / in matches */
+#endif
+
+
+
+/*
+ * case mapping functions. case_map is used to ignore case in comparisons,
+ * to_up is used to force upper case even on Unix (for dosify option).
+ */
+#ifdef USE_CASE_MAP
+#  define case_map(c) upper[(c) & 0xff]
+#  define to_up(c)    upper[(c) & 0xff]
+#else
+#  define case_map(c) (c)
+#  define to_up(c)    ((c) >= 'a' && (c) <= 'z' ? (c)-'a'+'A' : (c))
+#endif /* USE_CASE_MAP */
+
+
 #if 0                /* GRR:  add this to unzip.h someday... */
 #if !(defined(MSDOS) && defined(DOSWILD))
 #ifdef WILD_STOP_AT_DIR
@@ -114,8 +136,8 @@ int recmatch OF((ZCONST uch *pattern, ZC
                  int ignore_case __WDLPRO));
 #endif
 #endif /* 0 */
-static int recmatch OF((ZCONST uch *pattern, ZCONST uch *string,
-                        int ignore_case __WDLPRO));
+static int recmatch OF((ZCONST char *, ZCONST char *, 
+                        int));
 static char *isshexp OF((ZCONST char *p));
 static int namecmp OF((ZCONST char *s1, ZCONST char *s2));
 
@@ -154,192 +176,240 @@ int match(string, pattern, ignore_case _
             }
             dospattern[j-1] = '\0';                    /* nuke the end "." */
         }
-        j = recmatch((uch *)dospattern, (uch *)string, ignore_case __WDL);
+        j = recmatch(dospattern, string, ignore_case);
         free(dospattern);
         return j == 1;
     } else
 #endif /* MSDOS && DOSWILD */
-    return recmatch((uch *)pattern, (uch *)string, ignore_case __WDL) == 1;
+    return recmatch(pattern, string, ignore_case) == 1;
 }
 
+#ifdef _MBCS
+
+char *___tmp_ptr;
 
+#endif
 
-static int recmatch(p, s, ic __WDL)
-    ZCONST uch *p;        /* sh pattern to match */
-    ZCONST uch *s;        /* string to which to match it */
-    int ic;               /* true for case insensitivity */
-    __WDLDEF              /* directory sepchar for WildStopAtDir mode, or 0 */
+static int recmatch(p, s, cs)
+ZCONST char *p;         /* sh pattern to match */
+ZCONST char *s;         /* string to match it to */
+int cs;                 /* flag: force case-sensitive matching */
 /* Recursively compare the sh pattern p with the string s and return 1 if
- * they match, and 0 or 2 if they don't or if there is a syntax error in the
- * pattern.  This routine recurses on itself no more deeply than the number
- * of characters in the pattern. */
+   they match, and 0 or 2 if they don't or if there is a syntax error in the
+   pattern.  This routine recurses on itself no deeper than the number of
+   characters in the pattern. */
 {
-    unsigned int c;       /* pattern char or start of range in [-] loop */
+  int c;                /* pattern char or start of range in [-] loop */
+  /* Get first character, the pattern for new recmatch calls follows */
+ /* borrowed from Zip's global.c */
+ int no_wild = 0; 
+ int allow_regex=1;
+  /* This fix provided by akt@m5.dion.ne.jp for Japanese.
+     See 21 July 2006 mail.
+     It only applies when p is pointing to a doublebyte character and
+     things like / and wildcards are not doublebyte.  This probably
+     should not be needed. */
 
-    /* Get first character, the pattern for new recmatch calls follows */
-    c = *p; INCSTR(p);
+#ifdef _MBCS
+  if (CLEN(p) == 2) {
+    if (CLEN(s) == 2) {
+      return (*p == *s && *(p+1) == *(s+1)) ?
+        recmatch(p + 2, s + 2, cs) : 0;
+    } else {
+      return 0;
+    }
+  }
+#endif /* ?_MBCS */
 
-    /* If that was the end of the pattern, match if string empty too */
-    if (c == 0)
-        return *s == 0;
+  c = *POSTINCSTR(p);
 
-    /* '?' (or '%') matches any character (but not an empty string). */
-    if (c == WILDCHAR)
-#ifdef WILD_STOP_AT_DIR
-        /* If uO.W_flag is non-zero, it won't match '/' */
-        return (*s && (!sepc || *s != (uch)sepc))
-               ? recmatch(p, s + CLEN(s), ic, sepc) : 0;
-#else
-        return *s ? recmatch(p, s + CLEN(s), ic) : 0;
-#endif
+  /* If that was the end of the pattern, match if string empty too */
+  if (c == 0)
+    return *s == 0;
+
+  /* '?' (or '%' or '#') matches any character (but not an empty string) */
+  if (c == WILDCHR_SINGLE) {
+    if (wild_stop_at_dir)
+      return (*s && *s != DIRSEP_CHR) ? recmatch(p, s + CLEN(s), cs) : 0;
+    else
+      return *s ? recmatch(p, s + CLEN(s), cs) : 0;
+  }
 
-    /* '*' matches any number of characters, including zero */
+  /* WILDCHR_MULTI ('*') matches any number of characters, including zero */
 #ifdef AMIGA
-    if (c == '#' && *p == '?')     /* "#?" is Amiga-ese for "*" */
-        c = '*', p++;
+  if (!no_wild && c == '#' && *p == '?')            /* "#?" is Amiga-ese for "*" */
+    c = WILDCHR_MULTI, p++;
 #endif /* AMIGA */
-    if (c == '*') {
-#ifdef WILD_STOP_AT_DIR
-        if (sepc) {
-          /* check for single "*" or double "**" */
-#  ifdef AMIGA
-          if ((c = p[0]) == '#' && p[1] == '?') /* "#?" is Amiga-ese for "*" */
-            c = '*', p++;
-          if (c != '*') {
-#  else /* !AMIGA */
-          if (*p != '*') {
-#  endif /* ?AMIGA */
-            /* single "*": this doesn't match the dirsep character */
-            for (; *s && *s != (uch)sepc; INCSTR(s))
-                if ((c = recmatch(p, s, ic, sepc)) != 0)
-                    return (int)c;
-            /* end of pattern: matched if at end of string, else continue */
-            if (*p == '\0')
-                return (*s == 0);
-            /* continue to match if at sepc in pattern, else give up */
-            return (*p == (uch)sepc || (*p == '\\' && p[1] == (uch)sepc))
-                   ? recmatch(p, s, ic, sepc) : 2;
-          }
-          /* "**": this matches slashes */
-          ++p;        /* move p behind the second '*' */
-          /* and continue with the non-W_flag code variant */
-        }
-#endif /* WILD_STOP_AT_DIR */
+  if (!no_wild && c == WILDCHR_MULTI)
+  {
+    if (wild_stop_at_dir) {
+      /* Check for an immediately following WILDCHR_MULTI */
+# ifdef AMIGA
+      if ((c = p[0]) == '#' && p[1] == '?') /* "#?" is Amiga-ese for "*" */
+        c = WILDCHR_MULTI, p++;
+      if (c != WILDCHR_MULTI) {
+# else /* !AMIGA */
+      if (*p != WILDCHR_MULTI) {
+# endif /* ?AMIGA */
+        /* Single WILDCHR_MULTI ('*'): this doesn't match slashes */
+        for (; *s && *s != DIRSEP_CHR; INCSTR(s))
+          if ((c = recmatch(p, s, cs)) != 0)
+            return c;
+        /* end of pattern: matched if at end of string, else continue */
         if (*p == 0)
-            return 1;
-        if (isshexp((ZCONST char *)p) == NULL) {
-            /* Optimization for rest of pattern being a literal string:
-             * If there are no other shell expression chars in the rest
-             * of the pattern behind the multi-char wildcard, then just
-             * compare the literal string tail.
-             */
-            ZCONST uch *srest;
-
-            srest = s + (strlen((ZCONST char *)s) - strlen((ZCONST char *)p));
-            if (srest - s < 0)
-                /* remaining literal string from pattern is longer than rest
-                 * of test string, there can't be a match
-                 */
-                return 0;
-            else
-              /* compare the remaining literal pattern string with the last
-               * bytes of the test string to check for a match
-               */
+          return (*s == 0);
+        /* continue to match if at DIRSEP_CHR in pattern, else give up */
+        return (*p == DIRSEP_CHR || (*p == '\\' && p[1] == DIRSEP_CHR))
+               ? recmatch(p, s, cs) : 2;
+      }
+      /* Two consecutive WILDCHR_MULTI ("**"): this matches DIRSEP_CHR ('/') */
+      p++;        /* move p past the second WILDCHR_MULTI */
+      /* continue with the normal non-WILD_STOP_AT_DIR code */
+    } /* wild_stop_at_dir */
+
+    /* Not wild_stop_at_dir */
+    if (*p == 0)
+      return 1;
+    if (!isshexp((char *)p))
+    {
+      /* optimization for rest of pattern being a literal string */
+
+      /* optimization to handle patterns like *.txt */
+      /* if the first char in the pattern is '*' and there */
+      /* are no other shell expression chars, i.e. a literal string */
+      /* then just compare the literal string at the end */
+
+      ZCONST char *srest;
+
+      srest = s + (strlen(s) - strlen(p));
+      if (srest - s < 0)
+        /* remaining literal string from pattern is longer than rest of
+           test string, there can't be a match
+         */
+        return 0;
+      else
+        /* compare the remaining literal pattern string with the last bytes
+           of the test string to check for a match */
 #ifdef _MBCS
-            {
-                ZCONST uch *q = s;
+      {
+        ZCONST char *q = s;
 
-                /* MBCS-aware code must not scan backwards into a string from
-                 * the end.
-                 * So, we have to move forward by character from our well-known
-                 * character position s in the test string until we have
-                 * advanced to the srest position.
-                 */
-                while (q < srest)
-                  INCSTR(q);
-                /* In case the byte *srest is a trailing byte of a multibyte
-                 * character in the test string s, we have actually advanced
-                 * past the position (srest).
-                 * For this case, the match has failed!
-                 */
-                if (q != srest)
-                    return 0;
-                return ((ic
-                         ? namecmp((ZCONST char *)p, (ZCONST char *)q)
-                         : strcmp((ZCONST char *)p, (ZCONST char *)q)
-                        ) == 0);
-            }
+        /* MBCS-aware code must not scan backwards into a string from
+         * the end.
+         * So, we have to move forward by character from our well-known
+         * character position s in the test string until we have advanced
+         * to the srest position.
+         */
+        while (q < srest)
+          INCSTR(q);
+        /* In case the byte *srest is a trailing byte of a multibyte
+         * character, we have actually advanced past the position (srest).
+         * For this case, the match has failed!
+         */
+        if (q != srest)
+          return 0;
+        return ((cs ? strcmp(p, q) : namecmp(p, q)) == 0);
+      }
 #else /* !_MBCS */
-                return ((ic
-                         ? namecmp((ZCONST char *)p, (ZCONST char *)srest)
-                         : strcmp((ZCONST char *)p, (ZCONST char *)srest)
-                        ) == 0);
+        return ((cs ? strcmp(p, srest) : namecmp(p, srest)) == 0);
 #endif /* ?_MBCS */
-        } else {
-            /* pattern contains more wildcards, continue with recursion... */
-            for (; *s; INCSTR(s))
-                if ((c = recmatch(p, s, ic __WDL)) != 0)
-                    return (int)c;
-            return 2;  /* 2 means give up--match will return false */
-        }
     }
-
-    /* Parse and process the list of characters and ranges in brackets */
-    if (c == BEG_RANGE) {
-        int e;          /* flag true if next char to be taken literally */
-        ZCONST uch *q;  /* pointer to end of [-] group */
-        int r;          /* flag true to match anything but the range */
-
-        if (*s == 0)                            /* need a character to match */
-            return 0;
-        p += (r = (*p == '!' || *p == '^'));    /* see if reverse */
-        for (q = p, e = 0; *q; INCSTR(q))       /* find closing bracket */
-            if (e)
-                e = 0;
-            else
-                if (*q == '\\')      /* GRR:  change to ^ for MS-DOS, OS/2? */
-                    e = 1;
-                else if (*q == END_RANGE)
-                    break;
-        if (*q != END_RANGE)         /* nothing matches if bad syntax */
-            return 0;
-        for (c = 0, e = (*p == '-'); p < q; INCSTR(p)) {
-            /* go through the list */
-            if (!e && *p == '\\')               /* set escape flag if \ */
-                e = 1;
-            else if (!e && *p == '-')           /* set start of range if - */
-                c = *(p-1);
-            else {
-                unsigned int cc = Case(*s);
-
-                if (*(p+1) != '-')
-                    for (c = c ? c : *p; c <= *p; c++)  /* compare range */
-                        if ((unsigned)Case(c) == cc) /* typecast for MSC bug */
-                            return r ? 0 : recmatch(q + 1, s + 1, ic __WDL);
-                c = e = 0;   /* clear range, escape flags */
-            }
-        }
-        return r ? recmatch(q + CLEN(q), s + CLEN(s), ic __WDL) : 0;
-                                        /* bracket match failed */
+    else
+    {
+      /* pattern contains more wildcards, continue with recursion... */
+      for (; *s; INCSTR(s))
+        if ((c = recmatch(p, s, cs)) != 0)
+          return c;
+      return 2;           /* 2 means give up--shmatch will return false */
     }
+  }
 
-    /* if escape ('\\'), just compare next character */
-    if (c == '\\' && (c = *p++) == 0)     /* if \ at end, then syntax error */
-        return 0;
+#ifndef VMS             /* No bracket matching in VMS */
+  /* Parse and process the list of characters and ranges in brackets */
+  if (!no_wild && allow_regex && c == '[')
+  {
+    int e;              /* flag true if next char to be taken literally */
+    ZCONST char *q;     /* pointer to end of [-] group */
+    int r;              /* flag true to match anything but the range */
+
+    if (*s == 0)                        /* need a character to match */
+      return 0;
+    p += (r = (*p == '!' || *p == '^')); /* see if reverse */
+    for (q = p, e = 0; *q; q++)         /* find closing bracket */
+      if (e)
+        e = 0;
+      else
+        if (*q == '\\')
+          e = 1;
+        else if (*q == ']')
+          break;
+    if (*q != ']')                      /* nothing matches if bad syntax */
+      return 0;
+    for (c = 0, e = *p == '-'; p < q; p++)      /* go through the list */
+    {
+      if (e == 0 && *p == '\\')         /* set escape flag if \ */
+        e = 1;
+      else if (e == 0 && *p == '-')     /* set start of range if - */
+        c = *(p-1);
+      else
+      {
+        uch cc = (cs ? (uch)*s : case_map((uch)*s));
+        uch uc = (uch) c;
+        if (*(p+1) != '-')
+          for (uc = uc ? uc : (uch)*p; uc <= (uch)*p; uc++)
+            /* compare range */
+            if ((cs ? uc : case_map(uc)) == cc)
+              return r ? 0 : recmatch(q + CLEN(q), s + CLEN(s), cs);
+        c = e = 0;                      /* clear range, escape flags */
+      }
+    }
+    return r ? recmatch(q + CLEN(q), s + CLEN(s), cs) : 0;
+                                        /* bracket match failed */
+  }
+#endif /* !VMS */
 
-    /* just a character--compare it */
-#ifdef QDOS
-    return QMatch(Case((uch)c), Case(*s)) ?
-           recmatch(p, s + CLEN(s), ic __WDL) : 0;
-#else
-    return Case((uch)c) == Case(*s) ?
-           recmatch(p, s + CLEN(s), ic __WDL) : 0;
-#endif
+  /* If escape ('\'), just compare next character */
+  if (!no_wild && c == '\\')
+    if ((c = *p++) == '\0')             /* if \ at end, then syntax error */
+      return 0;
+
+#ifdef VMS
+  /* 2005-11-06 SMS.
+     Handle "..." wildcard in p with "." or "]" in s.
+  */
+  if ((c == '.') && (*p == '.') && (*(p+ CLEN( p)) == '.') &&
+   ((*s == '.') || (*s == ']')))
+  {
+    /* Match "...]" with "]".  Continue after "]" in both. */
+    if ((*(p+ 2* CLEN( p)) == ']') && (*s == ']'))
+      return recmatch( (p+ 3* CLEN( p)), (s+ CLEN( s)), cs);
+
+    /* Else, look for a reduced match in s, until "]" in or end of s. */
+    for (; *s && (*s != ']'); INCSTR(s))
+      if (*s == '.')
+        /* If reduced match, then continue after "..." in p, "." in s. */
+        if ((c = recmatch( (p+ CLEN( p)), s, cs)) != 0)
+          return (int)c;
+
+    /* Match "...]" with "]".  Continue after "]" in both. */
+    if ((*(p+ 2* CLEN( p)) == ']') && (*s == ']'))
+      return recmatch( (p+ 3* CLEN( p)), (s+ CLEN( s)), cs);
+
+    /* No reduced match.  Quit. */
+    return 2;
+  }
+
+#endif /* def VMS */
+
+  /* Just a character--compare it */
+  return (cs ? c == *s : case_map((uch)c) == case_map((uch)*s)) ?
+          recmatch(p, s + CLEN(s), cs) : 0;
+}
 
-} /* end function recmatch() */
 
 
 
+/*************************************************************************************************/
 static char *isshexp(p)
 ZCONST char *p;
 /* If p is a sh expression, a pointer to the first special character is