Blob Blame Raw
From 1385ac98c5f75358978bb05c2d6c4134413cf689 Mon Sep 17 00:00:00 2001
From: David Mitchell <davem@iabyn.com>
Date: Fri, 22 Mar 2019 17:38:48 +0000
Subject: [PATCH] avoid leak assigning regexp to non-COW string
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In something like

    $s = substr(.....); # $s now a non-COW SvPOK() SV
    $r = qr/..../;
    $s = $$r;

$s's previous string buffer would leak when an SVt_REGEXP type SV is
assigned to it.

Worse, if $s was an SVt_PVPV, it would fail an assert on debugging
builds.

The fix is to make sure any remaining stringy stuff is cleaned up
before copying the REGEXP.

Signed-off-by: Petr Písař <ppisar@redhat.com>
---
 regcomp.c | 16 ++++++++++++++++
 t/op/qr.t | 34 +++++++++++++++++++++++++++++++++-
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/regcomp.c b/regcomp.c
index 15783541a4..e13da83673 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -20665,7 +20665,23 @@ Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv)
     if (!dsv)
 	dsv = (REGEXP*) newSV_type(SVt_REGEXP);
     else {
+        assert(SvTYPE(dsv) == SVt_REGEXP || (SvTYPE(dsv) == SVt_PVLV));
+
+        /* our only valid caller, sv_setsv_flags(), should have done
+         * a SV_CHECK_THINKFIRST_COW_DROP() by now */
+        assert(!SvOOK(dsv));
+        assert(!SvIsCOW(dsv));
+        assert(!SvROK(dsv));
+
+        if (SvPVX_const(dsv)) {
+            if (SvLEN(dsv))
+                Safefree(SvPVX(dsv));
+            SvPVX(dsv) = NULL;
+        }
+        SvLEN_set(dsv, 0);
+        SvCUR_set(dsv, 0);
 	SvOK_off((SV *)dsv);
+
 	if (islv) {
 	    /* For PVLVs, the head (sv_any) points to an XPVLV, while
              * the LV's xpvlenu_rx will point to a regexp body, which
diff --git a/t/op/qr.t b/t/op/qr.t
index 32b9e3b23b..e03a465430 100644
--- a/t/op/qr.t
+++ b/t/op/qr.t
@@ -7,7 +7,7 @@ BEGIN {
     require './test.pl';
 }
 
-plan(tests => 34);
+plan(tests => 37);
 
 sub r {
     return qr/Good/;
@@ -135,3 +135,35 @@ sub {
     };
 }
 pass("PVLV-as-REGEXP double-free of PVX");
+
+# a non-cow SVPV leaked it's string buffer when a REGEXP was assigned to
+# it. Give valgrind/ASan something to work on
+{
+    my $s = substr("ab",0,1); # generate a non-COW string
+    my $r1 = qr/x/;
+    $s = $$r1; # make sure "a" isn't leaked
+    pass("REGEXP leak");
+
+    my $dest = 0;
+    sub Foo99::DESTROY { $dest++ }
+
+    # ditto but make sure we don't leak a reference
+    {
+        my $ref = bless [], "Foo99";
+        my $r2 = qr/x/;
+        $ref = $$r2;
+    }
+    is($dest, 1, "REGEXP RV leak");
+
+    # and worse, assigning a REGEXP to an PVLV that had a string value
+    # caused an assert failure. Same code, but using $_[0] which is an
+    # lvalue, rather than $s.
+
+    my %h;
+    sub {
+        $_[0] = substr("ab",0,1); # generate a non-COW string
+        my $r = qr/x/;
+        $_[0] = $$r; # make sure "a" isn't leaked
+    }->($h{foo}); # passes PVLV to sub
+    is($h{foo}, "(?^:x)", "REGEXP PVLV leak");
+}
-- 
2.20.1