commit e444525ef1634b675cd1cf52d39f4320ef0aecfd Author: Mike Dalessio Date: Sun Apr 10 14:42:04 2022 -0400 fix(perf): HTML4::EncodingReader detection diff --git a/lib/nokogiri/html4/document.rb b/lib/nokogiri/html4/document.rb index 177efc04..fbc22d20 100644 --- a/lib/nokogiri/html4/document.rb +++ b/lib/nokogiri/html4/document.rb @@ -268,7 +268,7 @@ module Nokogiri end def self.detect_encoding(chunk) - (m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/)) && + (m = chunk.match(/\A(<\?xml[ \t\r\n][^>]*>)/)) && (return Nokogiri.XML(m[1]).encoding) if Nokogiri.jruby? diff --git a/test/html4/test_document_encoding.rb b/test/html4/test_document_encoding.rb index 61153017..ecb4aa9a 100644 --- a/test/html4/test_document_encoding.rb +++ b/test/html4/test_document_encoding.rb @@ -155,6 +155,18 @@ class TestNokogiriHtmlDocument < Nokogiri::TestCase end end end + + it "does not start backtracking during detection of XHTML encoding" do + # this test is a quick and dirty version + # of the more complete perf test that is on main. + n = 40_000 + redos_string = "