diff -up unzip60/match.c.recmatch unzip60/match.c --- unzip60/match.c.recmatch 2005-08-14 13:00:36.000000000 -0400 +++ unzip60/match.c 2013-05-28 10:29:57.949077543 -0400 @@ -27,16 +27,14 @@ --------------------------------------------------------------------------- - Copyright on recmatch() from Zip's util.c (although recmatch() was almost - certainly written by Mark Adler...ask me how I can tell :-) ): + Copyright on recmatch() from Zip's util.c + Copyright (c) 1990-2005 Info-ZIP. All rights reserved. - Copyright (C) 1990-1992 Mark Adler, Richard B. Wales, Jean-loup Gailly, - Kai Uwe Rommel and Igor Mandrichenko. + See the accompanying file LICENSE, version 2004-May-22 or later + for terms of use. + If, for some reason, both of these files are missing, the Info-ZIP license + also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html - Permission is granted to any individual or institution to use, copy, - or redistribute this software so long as all of the original files are - included unmodified, that it is not sold for profit, and that this copy- - right notice is retained. --------------------------------------------------------------------------- @@ -53,7 +51,7 @@ A set is composed of characters or ranges; a range looks like ``character hyphen character'' (as in 0-9 or A-Z). [0-9a-zA-Z_] is the minimal set of - characters allowed in the [..] pattern construct. Other characters are + characters ALlowed in the [..] pattern construct. Other characters are allowed (i.e., 8-bit characters) if your system will support them. To suppress the special syntactic significance of any of ``[]*?!^-\'', in- @@ -101,8 +99,32 @@ # define WILDCHAR '?' # define BEG_RANGE '[' # define END_RANGE ']' +# define WILDCHR_SINGLE '?' +# define DIRSEP_CHR '/' +# define WILDCHR_MULTI '*' #endif +#ifdef WILD_STOP_AT_DIR + int wild_stop_at_dir = 1; /* default wildcards do not include / in matches */ +#else + int wild_stop_at_dir = 0; /* default wildcards do include / in matches */ +#endif + + + +/* + * case mapping functions. case_map is used to ignore case in comparisons, + * to_up is used to force upper case even on Unix (for dosify option). + */ +#ifdef USE_CASE_MAP +# define case_map(c) upper[(c) & 0xff] +# define to_up(c) upper[(c) & 0xff] +#else +# define case_map(c) (c) +# define to_up(c) ((c) >= 'a' && (c) <= 'z' ? (c)-'a'+'A' : (c)) +#endif /* USE_CASE_MAP */ + + #if 0 /* GRR: add this to unzip.h someday... */ #if !(defined(MSDOS) && defined(DOSWILD)) #ifdef WILD_STOP_AT_DIR @@ -114,8 +136,8 @@ int recmatch OF((ZCONST uch *pattern, ZC int ignore_case __WDLPRO)); #endif #endif /* 0 */ -static int recmatch OF((ZCONST uch *pattern, ZCONST uch *string, - int ignore_case __WDLPRO)); +static int recmatch OF((ZCONST char *, ZCONST char *, + int)); static char *isshexp OF((ZCONST char *p)); static int namecmp OF((ZCONST char *s1, ZCONST char *s2)); @@ -154,192 +176,240 @@ int match(string, pattern, ignore_case _ } dospattern[j-1] = '\0'; /* nuke the end "." */ } - j = recmatch((uch *)dospattern, (uch *)string, ignore_case __WDL); + j = recmatch(dospattern, string, ignore_case); free(dospattern); return j == 1; } else #endif /* MSDOS && DOSWILD */ - return recmatch((uch *)pattern, (uch *)string, ignore_case __WDL) == 1; + return recmatch(pattern, string, ignore_case) == 1; } +#ifdef _MBCS + +char *___tmp_ptr; +#endif -static int recmatch(p, s, ic __WDL) - ZCONST uch *p; /* sh pattern to match */ - ZCONST uch *s; /* string to which to match it */ - int ic; /* true for case insensitivity */ - __WDLDEF /* directory sepchar for WildStopAtDir mode, or 0 */ +static int recmatch(p, s, cs) +ZCONST char *p; /* sh pattern to match */ +ZCONST char *s; /* string to match it to */ +int cs; /* flag: force case-sensitive matching */ /* Recursively compare the sh pattern p with the string s and return 1 if - * they match, and 0 or 2 if they don't or if there is a syntax error in the - * pattern. This routine recurses on itself no more deeply than the number - * of characters in the pattern. */ + they match, and 0 or 2 if they don't or if there is a syntax error in the + pattern. This routine recurses on itself no deeper than the number of + characters in the pattern. */ { - unsigned int c; /* pattern char or start of range in [-] loop */ + int c; /* pattern char or start of range in [-] loop */ + /* Get first character, the pattern for new recmatch calls follows */ + /* borrowed from Zip's global.c */ + int no_wild = 0; + int allow_regex=1; + /* This fix provided by akt@m5.dion.ne.jp for Japanese. + See 21 July 2006 mail. + It only applies when p is pointing to a doublebyte character and + things like / and wildcards are not doublebyte. This probably + should not be needed. */ - /* Get first character, the pattern for new recmatch calls follows */ - c = *p; INCSTR(p); +#ifdef _MBCS + if (CLEN(p) == 2) { + if (CLEN(s) == 2) { + return (*p == *s && *(p+1) == *(s+1)) ? + recmatch(p + 2, s + 2, cs) : 0; + } else { + return 0; + } + } +#endif /* ?_MBCS */ - /* If that was the end of the pattern, match if string empty too */ - if (c == 0) - return *s == 0; + c = *POSTINCSTR(p); - /* '?' (or '%') matches any character (but not an empty string). */ - if (c == WILDCHAR) -#ifdef WILD_STOP_AT_DIR - /* If uO.W_flag is non-zero, it won't match '/' */ - return (*s && (!sepc || *s != (uch)sepc)) - ? recmatch(p, s + CLEN(s), ic, sepc) : 0; -#else - return *s ? recmatch(p, s + CLEN(s), ic) : 0; -#endif + /* If that was the end of the pattern, match if string empty too */ + if (c == 0) + return *s == 0; + + /* '?' (or '%' or '#') matches any character (but not an empty string) */ + if (c == WILDCHR_SINGLE) { + if (wild_stop_at_dir) + return (*s && *s != DIRSEP_CHR) ? recmatch(p, s + CLEN(s), cs) : 0; + else + return *s ? recmatch(p, s + CLEN(s), cs) : 0; + } - /* '*' matches any number of characters, including zero */ + /* WILDCHR_MULTI ('*') matches any number of characters, including zero */ #ifdef AMIGA - if (c == '#' && *p == '?') /* "#?" is Amiga-ese for "*" */ - c = '*', p++; + if (!no_wild && c == '#' && *p == '?') /* "#?" is Amiga-ese for "*" */ + c = WILDCHR_MULTI, p++; #endif /* AMIGA */ - if (c == '*') { -#ifdef WILD_STOP_AT_DIR - if (sepc) { - /* check for single "*" or double "**" */ -# ifdef AMIGA - if ((c = p[0]) == '#' && p[1] == '?') /* "#?" is Amiga-ese for "*" */ - c = '*', p++; - if (c != '*') { -# else /* !AMIGA */ - if (*p != '*') { -# endif /* ?AMIGA */ - /* single "*": this doesn't match the dirsep character */ - for (; *s && *s != (uch)sepc; INCSTR(s)) - if ((c = recmatch(p, s, ic, sepc)) != 0) - return (int)c; - /* end of pattern: matched if at end of string, else continue */ - if (*p == '\0') - return (*s == 0); - /* continue to match if at sepc in pattern, else give up */ - return (*p == (uch)sepc || (*p == '\\' && p[1] == (uch)sepc)) - ? recmatch(p, s, ic, sepc) : 2; - } - /* "**": this matches slashes */ - ++p; /* move p behind the second '*' */ - /* and continue with the non-W_flag code variant */ - } -#endif /* WILD_STOP_AT_DIR */ + if (!no_wild && c == WILDCHR_MULTI) + { + if (wild_stop_at_dir) { + /* Check for an immediately following WILDCHR_MULTI */ +# ifdef AMIGA + if ((c = p[0]) == '#' && p[1] == '?') /* "#?" is Amiga-ese for "*" */ + c = WILDCHR_MULTI, p++; + if (c != WILDCHR_MULTI) { +# else /* !AMIGA */ + if (*p != WILDCHR_MULTI) { +# endif /* ?AMIGA */ + /* Single WILDCHR_MULTI ('*'): this doesn't match slashes */ + for (; *s && *s != DIRSEP_CHR; INCSTR(s)) + if ((c = recmatch(p, s, cs)) != 0) + return c; + /* end of pattern: matched if at end of string, else continue */ if (*p == 0) - return 1; - if (isshexp((ZCONST char *)p) == NULL) { - /* Optimization for rest of pattern being a literal string: - * If there are no other shell expression chars in the rest - * of the pattern behind the multi-char wildcard, then just - * compare the literal string tail. - */ - ZCONST uch *srest; - - srest = s + (strlen((ZCONST char *)s) - strlen((ZCONST char *)p)); - if (srest - s < 0) - /* remaining literal string from pattern is longer than rest - * of test string, there can't be a match - */ - return 0; - else - /* compare the remaining literal pattern string with the last - * bytes of the test string to check for a match - */ + return (*s == 0); + /* continue to match if at DIRSEP_CHR in pattern, else give up */ + return (*p == DIRSEP_CHR || (*p == '\\' && p[1] == DIRSEP_CHR)) + ? recmatch(p, s, cs) : 2; + } + /* Two consecutive WILDCHR_MULTI ("**"): this matches DIRSEP_CHR ('/') */ + p++; /* move p past the second WILDCHR_MULTI */ + /* continue with the normal non-WILD_STOP_AT_DIR code */ + } /* wild_stop_at_dir */ + + /* Not wild_stop_at_dir */ + if (*p == 0) + return 1; + if (!isshexp((char *)p)) + { + /* optimization for rest of pattern being a literal string */ + + /* optimization to handle patterns like *.txt */ + /* if the first char in the pattern is '*' and there */ + /* are no other shell expression chars, i.e. a literal string */ + /* then just compare the literal string at the end */ + + ZCONST char *srest; + + srest = s + (strlen(s) - strlen(p)); + if (srest - s < 0) + /* remaining literal string from pattern is longer than rest of + test string, there can't be a match + */ + return 0; + else + /* compare the remaining literal pattern string with the last bytes + of the test string to check for a match */ #ifdef _MBCS - { - ZCONST uch *q = s; + { + ZCONST char *q = s; - /* MBCS-aware code must not scan backwards into a string from - * the end. - * So, we have to move forward by character from our well-known - * character position s in the test string until we have - * advanced to the srest position. - */ - while (q < srest) - INCSTR(q); - /* In case the byte *srest is a trailing byte of a multibyte - * character in the test string s, we have actually advanced - * past the position (srest). - * For this case, the match has failed! - */ - if (q != srest) - return 0; - return ((ic - ? namecmp((ZCONST char *)p, (ZCONST char *)q) - : strcmp((ZCONST char *)p, (ZCONST char *)q) - ) == 0); - } + /* MBCS-aware code must not scan backwards into a string from + * the end. + * So, we have to move forward by character from our well-known + * character position s in the test string until we have advanced + * to the srest position. + */ + while (q < srest) + INCSTR(q); + /* In case the byte *srest is a trailing byte of a multibyte + * character, we have actually advanced past the position (srest). + * For this case, the match has failed! + */ + if (q != srest) + return 0; + return ((cs ? strcmp(p, q) : namecmp(p, q)) == 0); + } #else /* !_MBCS */ - return ((ic - ? namecmp((ZCONST char *)p, (ZCONST char *)srest) - : strcmp((ZCONST char *)p, (ZCONST char *)srest) - ) == 0); + return ((cs ? strcmp(p, srest) : namecmp(p, srest)) == 0); #endif /* ?_MBCS */ - } else { - /* pattern contains more wildcards, continue with recursion... */ - for (; *s; INCSTR(s)) - if ((c = recmatch(p, s, ic __WDL)) != 0) - return (int)c; - return 2; /* 2 means give up--match will return false */ - } } - - /* Parse and process the list of characters and ranges in brackets */ - if (c == BEG_RANGE) { - int e; /* flag true if next char to be taken literally */ - ZCONST uch *q; /* pointer to end of [-] group */ - int r; /* flag true to match anything but the range */ - - if (*s == 0) /* need a character to match */ - return 0; - p += (r = (*p == '!' || *p == '^')); /* see if reverse */ - for (q = p, e = 0; *q; INCSTR(q)) /* find closing bracket */ - if (e) - e = 0; - else - if (*q == '\\') /* GRR: change to ^ for MS-DOS, OS/2? */ - e = 1; - else if (*q == END_RANGE) - break; - if (*q != END_RANGE) /* nothing matches if bad syntax */ - return 0; - for (c = 0, e = (*p == '-'); p < q; INCSTR(p)) { - /* go through the list */ - if (!e && *p == '\\') /* set escape flag if \ */ - e = 1; - else if (!e && *p == '-') /* set start of range if - */ - c = *(p-1); - else { - unsigned int cc = Case(*s); - - if (*(p+1) != '-') - for (c = c ? c : *p; c <= *p; c++) /* compare range */ - if ((unsigned)Case(c) == cc) /* typecast for MSC bug */ - return r ? 0 : recmatch(q + 1, s + 1, ic __WDL); - c = e = 0; /* clear range, escape flags */ - } - } - return r ? recmatch(q + CLEN(q), s + CLEN(s), ic __WDL) : 0; - /* bracket match failed */ + else + { + /* pattern contains more wildcards, continue with recursion... */ + for (; *s; INCSTR(s)) + if ((c = recmatch(p, s, cs)) != 0) + return c; + return 2; /* 2 means give up--shmatch will return false */ } + } - /* if escape ('\\'), just compare next character */ - if (c == '\\' && (c = *p++) == 0) /* if \ at end, then syntax error */ - return 0; +#ifndef VMS /* No bracket matching in VMS */ + /* Parse and process the list of characters and ranges in brackets */ + if (!no_wild && allow_regex && c == '[') + { + int e; /* flag true if next char to be taken literally */ + ZCONST char *q; /* pointer to end of [-] group */ + int r; /* flag true to match anything but the range */ + + if (*s == 0) /* need a character to match */ + return 0; + p += (r = (*p == '!' || *p == '^')); /* see if reverse */ + for (q = p, e = 0; *q; q++) /* find closing bracket */ + if (e) + e = 0; + else + if (*q == '\\') + e = 1; + else if (*q == ']') + break; + if (*q != ']') /* nothing matches if bad syntax */ + return 0; + for (c = 0, e = *p == '-'; p < q; p++) /* go through the list */ + { + if (e == 0 && *p == '\\') /* set escape flag if \ */ + e = 1; + else if (e == 0 && *p == '-') /* set start of range if - */ + c = *(p-1); + else + { + uch cc = (cs ? (uch)*s : case_map((uch)*s)); + uch uc = (uch) c; + if (*(p+1) != '-') + for (uc = uc ? uc : (uch)*p; uc <= (uch)*p; uc++) + /* compare range */ + if ((cs ? uc : case_map(uc)) == cc) + return r ? 0 : recmatch(q + CLEN(q), s + CLEN(s), cs); + c = e = 0; /* clear range, escape flags */ + } + } + return r ? recmatch(q + CLEN(q), s + CLEN(s), cs) : 0; + /* bracket match failed */ + } +#endif /* !VMS */ - /* just a character--compare it */ -#ifdef QDOS - return QMatch(Case((uch)c), Case(*s)) ? - recmatch(p, s + CLEN(s), ic __WDL) : 0; -#else - return Case((uch)c) == Case(*s) ? - recmatch(p, s + CLEN(s), ic __WDL) : 0; -#endif + /* If escape ('\'), just compare next character */ + if (!no_wild && c == '\\') + if ((c = *p++) == '\0') /* if \ at end, then syntax error */ + return 0; + +#ifdef VMS + /* 2005-11-06 SMS. + Handle "..." wildcard in p with "." or "]" in s. + */ + if ((c == '.') && (*p == '.') && (*(p+ CLEN( p)) == '.') && + ((*s == '.') || (*s == ']'))) + { + /* Match "...]" with "]". Continue after "]" in both. */ + if ((*(p+ 2* CLEN( p)) == ']') && (*s == ']')) + return recmatch( (p+ 3* CLEN( p)), (s+ CLEN( s)), cs); + + /* Else, look for a reduced match in s, until "]" in or end of s. */ + for (; *s && (*s != ']'); INCSTR(s)) + if (*s == '.') + /* If reduced match, then continue after "..." in p, "." in s. */ + if ((c = recmatch( (p+ CLEN( p)), s, cs)) != 0) + return (int)c; + + /* Match "...]" with "]". Continue after "]" in both. */ + if ((*(p+ 2* CLEN( p)) == ']') && (*s == ']')) + return recmatch( (p+ 3* CLEN( p)), (s+ CLEN( s)), cs); + + /* No reduced match. Quit. */ + return 2; + } + +#endif /* def VMS */ + + /* Just a character--compare it */ + return (cs ? c == *s : case_map((uch)c) == case_map((uch)*s)) ? + recmatch(p, s + CLEN(s), cs) : 0; +} -} /* end function recmatch() */ +/*************************************************************************************************/ static char *isshexp(p) ZCONST char *p; /* If p is a sh expression, a pointer to the first special character is