896b40f
From 1385ac98c5f75358978bb05c2d6c4134413cf689 Mon Sep 17 00:00:00 2001
896b40f
From: David Mitchell <davem@iabyn.com>
896b40f
Date: Fri, 22 Mar 2019 17:38:48 +0000
896b40f
Subject: [PATCH] avoid leak assigning regexp to non-COW string
896b40f
MIME-Version: 1.0
896b40f
Content-Type: text/plain; charset=UTF-8
896b40f
Content-Transfer-Encoding: 8bit
896b40f
896b40f
In something like
896b40f
896b40f
    $s = substr(.....); # $s now a non-COW SvPOK() SV
896b40f
    $r = qr/..../;
896b40f
    $s = $$r;
896b40f
896b40f
$s's previous string buffer would leak when an SVt_REGEXP type SV is
896b40f
assigned to it.
896b40f
896b40f
Worse, if $s was an SVt_PVPV, it would fail an assert on debugging
896b40f
builds.
896b40f
896b40f
The fix is to make sure any remaining stringy stuff is cleaned up
896b40f
before copying the REGEXP.
896b40f
896b40f
Signed-off-by: Petr Písař <ppisar@redhat.com>
896b40f
---
896b40f
 regcomp.c | 16 ++++++++++++++++
896b40f
 t/op/qr.t | 34 +++++++++++++++++++++++++++++++++-
896b40f
 2 files changed, 49 insertions(+), 1 deletion(-)
896b40f
896b40f
diff --git a/regcomp.c b/regcomp.c
896b40f
index 15783541a4..e13da83673 100644
896b40f
--- a/regcomp.c
896b40f
+++ b/regcomp.c
896b40f
@@ -20665,7 +20665,23 @@ Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv)
896b40f
     if (!dsv)
896b40f
 	dsv = (REGEXP*) newSV_type(SVt_REGEXP);
896b40f
     else {
896b40f
+        assert(SvTYPE(dsv) == SVt_REGEXP || (SvTYPE(dsv) == SVt_PVLV));
896b40f
+
896b40f
+        /* our only valid caller, sv_setsv_flags(), should have done
896b40f
+         * a SV_CHECK_THINKFIRST_COW_DROP() by now */
896b40f
+        assert(!SvOOK(dsv));
896b40f
+        assert(!SvIsCOW(dsv));
896b40f
+        assert(!SvROK(dsv));
896b40f
+
896b40f
+        if (SvPVX_const(dsv)) {
896b40f
+            if (SvLEN(dsv))
896b40f
+                Safefree(SvPVX(dsv));
896b40f
+            SvPVX(dsv) = NULL;
896b40f
+        }
896b40f
+        SvLEN_set(dsv, 0);
896b40f
+        SvCUR_set(dsv, 0);
896b40f
 	SvOK_off((SV *)dsv);
896b40f
+
896b40f
 	if (islv) {
896b40f
 	    /* For PVLVs, the head (sv_any) points to an XPVLV, while
896b40f
              * the LV's xpvlenu_rx will point to a regexp body, which
896b40f
diff --git a/t/op/qr.t b/t/op/qr.t
896b40f
index 32b9e3b23b..e03a465430 100644
896b40f
--- a/t/op/qr.t
896b40f
+++ b/t/op/qr.t
896b40f
@@ -7,7 +7,7 @@ BEGIN {
896b40f
     require './test.pl';
896b40f
 }
896b40f
 
896b40f
-plan(tests => 34);
896b40f
+plan(tests => 37);
896b40f
 
896b40f
 sub r {
896b40f
     return qr/Good/;
896b40f
@@ -135,3 +135,35 @@ sub {
896b40f
     };
896b40f
 }
896b40f
 pass("PVLV-as-REGEXP double-free of PVX");
896b40f
+
896b40f
+# a non-cow SVPV leaked it's string buffer when a REGEXP was assigned to
896b40f
+# it. Give valgrind/ASan something to work on
896b40f
+{
896b40f
+    my $s = substr("ab",0,1); # generate a non-COW string
896b40f
+    my $r1 = qr/x/;
896b40f
+    $s = $$r1; # make sure "a" isn't leaked
896b40f
+    pass("REGEXP leak");
896b40f
+
896b40f
+    my $dest = 0;
896b40f
+    sub Foo99::DESTROY { $dest++ }
896b40f
+
896b40f
+    # ditto but make sure we don't leak a reference
896b40f
+    {
896b40f
+        my $ref = bless [], "Foo99";
896b40f
+        my $r2 = qr/x/;
896b40f
+        $ref = $$r2;
896b40f
+    }
896b40f
+    is($dest, 1, "REGEXP RV leak");
896b40f
+
896b40f
+    # and worse, assigning a REGEXP to an PVLV that had a string value
896b40f
+    # caused an assert failure. Same code, but using $_[0] which is an
896b40f
+    # lvalue, rather than $s.
896b40f
+
896b40f
+    my %h;
896b40f
+    sub {
896b40f
+        $_[0] = substr("ab",0,1); # generate a non-COW string
896b40f
+        my $r = qr/x/;
896b40f
+        $_[0] = $$r; # make sure "a" isn't leaked
896b40f
+    }->($h{foo}); # passes PVLV to sub
896b40f
+    is($h{foo}, "(?^:x)", "REGEXP PVLV leak");
896b40f
+}
896b40f
-- 
896b40f
2.20.1
896b40f