diff --git a/grep-2.20-pcre-invalid-utf8-fix.patch b/grep-2.20-pcre-invalid-utf8-fix.patch new file mode 100644 index 0000000..5f7530f --- /dev/null +++ b/grep-2.20-pcre-invalid-utf8-fix.patch @@ -0,0 +1,136 @@ +diff --git a/src/pcresearch.c b/src/pcresearch.c +index 820dd00..11df488 100644 +--- a/src/pcresearch.c ++++ b/src/pcresearch.c +@@ -136,34 +136,42 @@ Pexecute (char const *buf, size_t size, size_t *match_size, + #else + /* This array must have at least two elements; everything after that + is just for performance improvement in pcre_exec. */ +- int sub[300]; ++ enum { nsub = 300 }; ++ int sub[nsub]; + +- const char *line_buf, *line_end, *line_next; ++ char const *p = start_ptr ? start_ptr : buf; ++ int options = p == buf || p[-1] == eolbyte ? 0 : PCRE_NOTBOL; ++ char const *line_start = buf; + int e = PCRE_ERROR_NOMATCH; +- ptrdiff_t start_ofs = start_ptr ? start_ptr - buf : 0; ++ char const *line_end; + + /* PCRE can't limit the matching to single lines, therefore we have to + match each line in the buffer separately. */ +- for (line_next = buf; +- e == PCRE_ERROR_NOMATCH && line_next < buf + size; +- start_ofs -= line_next - line_buf) ++ for (; p < buf + size; p = line_start = line_end + 1) + { +- line_buf = line_next; +- line_end = memchr (line_buf, eolbyte, (buf + size) - line_buf); +- if (line_end == NULL) +- line_next = line_end = buf + size; +- else +- line_next = line_end + 1; +- +- if (start_ptr && start_ptr >= line_end) +- continue; ++ line_end = memchr (p, eolbyte, buf + size - p); + +- if (INT_MAX < line_end - line_buf) ++ if (INT_MAX < line_end - p) + error (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit")); + +- e = pcre_exec (cre, extra, line_buf, line_end - line_buf, +- start_ofs < 0 ? 0 : start_ofs, 0, +- sub, sizeof sub / sizeof *sub); ++ /* Treat encoding-error bytes as data that cannot match. */ ++ for (;;) ++ { ++ e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub); ++ if (e != PCRE_ERROR_BADUTF8) ++ break; ++ e = pcre_exec (cre, extra, p, sub[0], 0, ++ options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL, ++ sub, nsub); ++ if (e != PCRE_ERROR_NOMATCH) ++ break; ++ p += sub[0] + 1; ++ options = PCRE_NOTBOL; ++ } ++ ++ if (e != PCRE_ERROR_NOMATCH) ++ break; ++ options = 0; + } + + if (e <= 0) +@@ -180,10 +188,6 @@ Pexecute (char const *buf, size_t size, size_t *match_size, + error (EXIT_TROUBLE, 0, + _("exceeded PCRE's backtracking limit")); + +- case PCRE_ERROR_BADUTF8: +- error (EXIT_TROUBLE, 0, +- _("invalid UTF-8 byte sequence in input")); +- + default: + /* For now, we lump all remaining PCRE failures into this basket. + If anyone cares to provide sample grep usage that can trigger +@@ -197,25 +201,8 @@ Pexecute (char const *buf, size_t size, size_t *match_size, + } + else + { +- /* Narrow down to the line we've found. */ +- char const *beg = line_buf + sub[0]; +- char const *end = line_buf + sub[1]; +- char const *buflim = buf + size; +- char eol = eolbyte; +- if (!start_ptr) +- { +- /* FIXME: The case when '\n' is not found indicates a bug: +- Since grep is line oriented, the match should never contain +- a newline, so there _must_ be a newline following. +- */ +- if (!(end = memchr (end, eol, buflim - end))) +- end = buflim; +- else +- end++; +- while (buf < beg && beg[-1] != eol) +- --beg; +- } +- ++ char const *beg = start_ptr ? p + sub[0] : line_start; ++ char const *end = start_ptr ? p + sub[1] : line_end + 1; + *match_size = end - beg; + return beg - buf; + } +diff --git a/tests/pcre-infloop b/tests/pcre-infloop +index 1b33e72..b92f8e1 100755 +--- a/tests/pcre-infloop ++++ b/tests/pcre-infloop +@@ -28,6 +28,6 @@ printf 'a\201b\r' > in || framework_failure_ + fail=0 + + LC_ALL=en_US.UTF-8 timeout 3 grep -P 'a.?..b' in +-test $? = 2 || fail_ "libpcre's match function appears to infloop" ++test $? = 1 || fail_ "libpcre's match function appears to infloop" + + Exit $fail +diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input +index 913e8ee..9da4b18 100755 +--- a/tests/pcre-invalid-utf8-input ++++ b/tests/pcre-invalid-utf8-input +@@ -13,9 +13,12 @@ require_en_utf8_locale_ + + fail=0 + +-printf 'j\202\nj\n' > in || framework_failure_ ++printf 'j\202j\nj\nk\202\n' > in || framework_failure_ + + LC_ALL=en_US.UTF-8 grep -P j in +-test $? -eq 2 || fail=1 ++test $? -eq 0 || fail=1 ++ ++LC_ALL=en_US.UTF-8 grep -P 'k$' in ++test $? -eq 1 || fail=1 + + Exit $fail diff --git a/grep.spec b/grep.spec index 7458394..1784194 100644 --- a/grep.spec +++ b/grep.spec @@ -3,7 +3,7 @@ Summary: Pattern matching utilities Name: grep Version: 2.20 -Release: 5%{?dist} +Release: 6%{?dist} License: GPLv3+ Group: Applications/Text Source: ftp://ftp.gnu.org/pub/gnu/grep/grep-%{version}.tar.xz @@ -15,6 +15,8 @@ Source4: grepconf.sh Patch0: grep-2.20-man-fix-gs.patch # upstream ticket 39445 Patch1: grep-2.20-help-align.patch +# backported from upstream +Patch2: grep-2.20-pcre-invalid-utf8-fix.patch URL: http://www.gnu.org/software/grep/ Requires(post): /sbin/install-info Requires(preun): /sbin/install-info @@ -35,6 +37,7 @@ GNU grep is needed by many scripts, so it shall be installed on every system. %setup -q %patch0 -p1 -b .man-fix-gs %patch1 -p1 -b .help-align +%patch2 -p1 -b .pcre-invalid-utf8-fix %build %global BUILD_FLAGS $RPM_OPT_FLAGS @@ -90,6 +93,11 @@ fi %{_libexecdir}/grepconf.sh %changelog +* Tue Nov 11 2014 Jaroslav Škarvada - 2.20-6 +- Fixed invalid UTF-8 byte sequence error in PCRE mode + (by pcre-invalid-utf8-fix patch) + Resolves: rhbz#1161832 + * Wed Aug 20 2014 Jaroslav Škarvada - 2.20-5 - Added script to check whether grep is coloured Resolves: rhbz#1034631