diff --git a/23112.patch b/23112.patch new file mode 100644 index 0000000..de28c46 --- /dev/null +++ b/23112.patch @@ -0,0 +1,72 @@ +From 82616eec41f6a6989a3b507822c17fc81a10e296 Mon Sep 17 00:00:00 2001 +From: zihaomu +Date: Mon, 9 Jan 2023 13:40:04 +0800 +Subject: [PATCH] fix possible segmentation fault error in winograd on x86 + +--- + .../src/layers/fast_convolution/fast_convolution.avx2.cpp | 2 +- + .../dnn/src/layers/fast_convolution/fast_convolution.cpp | 8 +++++++- + .../src/layers/fast_convolution/winograd_3x3s1_f63.cpp | 4 ++-- + 3 files changed, 10 insertions(+), 4 deletions(-) + +diff --git a/modules/dnn/src/layers/fast_convolution/fast_convolution.avx2.cpp b/modules/dnn/src/layers/fast_convolution/fast_convolution.avx2.cpp +index 0d3c1447626a..c98fbe72bda8 100644 +--- a/modules/dnn/src/layers/fast_convolution/fast_convolution.avx2.cpp ++++ b/modules/dnn/src/layers/fast_convolution/fast_convolution.avx2.cpp +@@ -119,7 +119,7 @@ void convBlock_AVX2(int np, const float* a, const float* b, float* c, int ldc, b + void _fx_winograd_accum_f32(const float* inwptr, const float* wptr, + float* outbuf, int Cg, int iblock) + { +- CV_Assert(_FX_WINO_IBLOCK == 6 && _FX_WINO_KBLOCK == 4);// && _FX_WINO_ATOM_F32 == 8); ++ CV_Assert(_FX_WINO_IBLOCK == 6 && _FX_WINO_KBLOCK == 4 && _FX_WINO_ATOM_F32 == 8); + if (iblock > 3) + { + for (int atom_id = 0; atom_id < _FX_WINO_NATOMS_F32; atom_id++, +diff --git a/modules/dnn/src/layers/fast_convolution/fast_convolution.cpp b/modules/dnn/src/layers/fast_convolution/fast_convolution.cpp +index 1cde7b324f6f..946980bebe49 100644 +--- a/modules/dnn/src/layers/fast_convolution/fast_convolution.cpp ++++ b/modules/dnn/src/layers/fast_convolution/fast_convolution.cpp +@@ -105,6 +105,12 @@ Ptr initFastConv( + conv->conv_type = _FX_CONV_TYPE_GENERIC; + #endif + ++#if CV_TRY_AVX2 ++ // Disabel Winograd when CV_TRY_AVX2 is true, but conv->useAVX2 is false. ++ if (conv->conv_type == _FX_CONV_TYPE_WINOGRAD3X3 && !conv->useAVX2) ++ conv->conv_type = _FX_CONV_TYPE_GENERIC; ++#endif ++ + Mat weightsMat = _weightsMat.getMat(); + auto wShape = shape(weightsMat); + const size_t wstep = weightsMat.step1(); +@@ -257,7 +263,7 @@ Ptr initFastConv( + // we can always read MR elements starting from any valid index + { + int k = 0, nbias = K + VEC_ALIGN; +- conv->biasBuf.reserve(nbias); ++ conv->biasBuf.resize(nbias); + float* biasBufPtr = conv->biasBuf.data(); + for(; k < K; k++) + biasBufPtr[k] = srcBias ? srcBias[k] : 0.f; +diff --git a/modules/dnn/src/layers/fast_convolution/winograd_3x3s1_f63.cpp b/modules/dnn/src/layers/fast_convolution/winograd_3x3s1_f63.cpp +index e3b80884102a..b0ccfd0cd24a 100644 +--- a/modules/dnn/src/layers/fast_convolution/winograd_3x3s1_f63.cpp ++++ b/modules/dnn/src/layers/fast_convolution/winograd_3x3s1_f63.cpp +@@ -22,7 +22,7 @@ _fx_winograd_accum_f32(const float* inwptr, const float* wptr, + float* outbuf, int Cg, int iblock) + { + #if CV_NEON && CV_NEON_AARCH64 +- CV_Assert(_FX_WINO_IBLOCK == 6 && _FX_WINO_KBLOCK == 4); ++ CV_Assert(_FX_WINO_IBLOCK == 6 && _FX_WINO_KBLOCK == 4 && _FX_WINO_ATOM_F32 == 4); + if (iblock > 3) + { + for (int atom_id = 0; atom_id < _FX_WINO_NATOMS_F32; atom_id++, +@@ -144,7 +144,7 @@ _fx_winograd_accum_f32(const float* inwptr, const float* wptr, + } + } + #elif CV_SIMD128 +- CV_Assert(_FX_WINO_IBLOCK == 3 && _FX_WINO_KBLOCK == 4); ++ CV_Assert(_FX_WINO_IBLOCK == 3 && _FX_WINO_KBLOCK == 4 && _FX_WINO_ATOM_F32 == 4); + for (int atom_id = 0; atom_id < _FX_WINO_NATOMS_F32; atom_id++, + outbuf += _FX_WINO_ATOM_F32) + { diff --git a/opencv.spec b/opencv.spec index 68f3259..b7ae026 100644 --- a/opencv.spec +++ b/opencv.spec @@ -74,7 +74,7 @@ Version: 4.7.0 %global minorver %(foo=%{version}; a=(${foo//./ }); echo ${a[1]} ) %global padding %(digits=00; num=%{minorver}; echo ${digits:${#num}:${#digits}} ) %global abiver %(echo %{majorver}%{padding}%{minorver} ) -Release: 8%{?dist} +Release: 9%{?dist} Summary: Collection of algorithms for computer vision # This is normal three clause BSD. License: BSD @@ -98,6 +98,8 @@ Source5: xorg.conf Patch0: opencv-4.1.0-install_3rdparty_licenses.patch Patch3: opencv.python.patch +# Upstream commit to fix rhbz#2190013 +Patch4: https://github.com/opencv/opencv/pull/23112.patch BuildRequires: gcc-c++ BuildRequires: cmake >= 2.6.3 @@ -292,6 +294,7 @@ popd &>/dev/null %patch -P 0 -p1 -b .install_3rdparty_licenses %patch -P 3 -p1 -b .python_install_binary +%patch -P 4 -p1 -b .backport_avx2 pushd %{name}_contrib-%{version} #patch1 -p1 -b .install_cvv @@ -518,6 +521,9 @@ ln -s -r %{buildroot}%{_jnidir}/opencv-%{javaver}.jar %{buildroot}%{_jnidir}/ope %{_libdir}/libopencv_xphoto.so.{%{abiver},%{version}} %changelog +* Mon Jun 12 2023 Nicolas Chauvet - 4.7.0-9 +- Upstream commit to fix rhbz#2190013 + * Sat May 13 2023 Sérgio Basto - 4.7.0-8 - The %%ldconfig_scriptlets macro can be removed on all Fedoras. Possibly also on EPEL 8. But it is required on EPEL 7.