--- grep-2.5.1a/src/search.c.w 2006-02-20 14:27:27.000000000 +0000 +++ grep-2.5.1a/src/search.c 2006-02-20 14:32:07.000000000 +0000 @@ -507,10 +507,114 @@ if (match_words) while (start >= 0) { - if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1])) - && (len == end - beg - 1 - || !WCHAR ((unsigned char) beg[start + len]))) - goto success_in_beg_and_end; + int lword_match = 0; + if (start == 0) + lword_match = 1; + else + { + assert (start > 0); +#ifdef MBS_SUPPORT + if (mb_cur_max > 1) + { + const char *s; + size_t mr; + wchar_t pwc; + + /* Locate the start of the multibyte character + before the match position (== beg + start). */ + if (using_utf8) + { + /* UTF-8 is a special case: scan backwards + until we find a 7-bit character or a + lead byte. */ + s = beg + start - 1; + while (s > buf + && (unsigned char) *s >= 0x80 + && (unsigned char) *s <= 0xbf) + --s; + } + else + { + /* Scan forwards to find the start of the + last complete character before the + match position. */ + size_t bytes_left = start - 1; + s = beg; + while (bytes_left > 0) + { + mr = mbrlen (s, bytes_left, &mbs); + if (mr == (size_t) -1 || mr == 0) + { + memset (&mbs, '\0', sizeof (mbs)); + s++; + bytes_left--; + continue; + } + if (mr == (size_t) -2) + { + memset (&mbs, '\0', sizeof (mbs)); + break; + } + s += mr; + bytes_left -= mr; + } + } + mr = mbrtowc (&pwc, s, beg + start - s, &mbs); + if (mr == (size_t) -2 || mr == (size_t) -1 || + mr == 0) + { + memset (&mbs, '\0', sizeof (mbstate_t)); + lword_match = 1; + } + else if (!(iswalnum (pwc) || pwc == L'_') + && mr == beg + start - s) + lword_match = 1; + } + else +#endif /* MBS_SUPPORT */ + if (!WCHAR ((unsigned char) beg[start - 1])) + lword_match = 1; + } + + if (lword_match) + { + int rword_match = 0; + if (start + len == end - beg - 1) + rword_match = 1; + else + { +#ifdef MBS_SUPPORT + if (mb_cur_max > 1) + { + wchar_t nwc; + int mr; + + mr = mbtowc (&nwc, beg + start + len, + end - beg - start - len - 1); + if (mr <= 0) + { + memset (&mbs, '\0', sizeof (mbstate_t)); + rword_match = 1; + } + else if (!iswalnum (nwc) && nwc != L'_') + rword_match = 1; + } + else +#endif /* MBS_SUPPORT */ + if (!WCHAR ((unsigned char) beg[start + len])) + rword_match = 1; + } + + if (rword_match) + { + if (!exact) + /* Returns the whole line. */ + goto success_in_beg_and_end; + else + /* Returns just this word match. */ + goto success_in_start_and_len; + } + } if (len > 0) { /* Try a shorter length anchored at the same place. */