diff --git a/.gitignore b/.gitignore index 35a0395..9cd87bb 100644 --- a/.gitignore +++ b/.gitignore @@ -72,3 +72,4 @@ /dask-2023.4.1.tar.gz /dask-2023.7.0.tar.gz /dask-2023.7.1.tar.gz +/dask-2023.8.1.tar.gz diff --git a/0001-Skip-test_encoding_gh601-on-big-endian-machines.patch b/0001-Skip-test_encoding_gh601-on-big-endian-machines.patch index 142a707..d6bb022 100644 --- a/0001-Skip-test_encoding_gh601-on-big-endian-machines.patch +++ b/0001-Skip-test_encoding_gh601-on-big-endian-machines.patch @@ -1,7 +1,7 @@ -From d32c91deead99fd94cb6e95702f4b3f9ffaf37d1 Mon Sep 17 00:00:00 2001 +From a5ea6c2f7fcaee8830aca6dca8102663976bae45 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Sun, 7 Mar 2021 04:07:32 -0500 -Subject: [PATCH 1/3] Skip test_encoding_gh601 on big-endian machines. +Subject: [PATCH 1/4] Skip test_encoding_gh601 on big-endian machines. Signed-off-by: Elliott Sales de Andrade --- @@ -9,7 +9,7 @@ Signed-off-by: Elliott Sales de Andrade 1 file changed, 2 insertions(+) diff --git a/dask/dataframe/io/tests/test_csv.py b/dask/dataframe/io/tests/test_csv.py -index f40960c3..c4d516d0 100644 +index 1e0dd41a..e922227b 100644 --- a/dask/dataframe/io/tests/test_csv.py +++ b/dask/dataframe/io/tests/test_csv.py @@ -4,6 +4,7 @@ import gzip diff --git a/0002-Skip-coverage-testing.patch b/0002-Skip-coverage-testing.patch index c761ef3..d999622 100644 --- a/0002-Skip-coverage-testing.patch +++ b/0002-Skip-coverage-testing.patch @@ -1,7 +1,7 @@ -From 5327f35750ea3eb9daa4e4340916961d70175600 Mon Sep 17 00:00:00 2001 +From 6c04cdb761204853b2fde68c029aecc4c22db453 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Sun, 7 May 2023 23:13:59 -0400 -Subject: [PATCH 2/3] Skip coverage testing +Subject: [PATCH 2/4] Skip coverage testing Signed-off-by: Elliott Sales de Andrade --- @@ -9,7 +9,7 @@ Signed-off-by: Elliott Sales de Andrade 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml -index 746e308f..20e1dbeb 100644 +index 506df79a..68835e4d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -122,7 +122,7 @@ markers = [ diff --git a/0003-TST-Increase-maximum-for-sizeof-test-to-pass-32-bit.patch b/0003-TST-Increase-maximum-for-sizeof-test-to-pass-32-bit.patch index ed32597..90fcdea 100644 --- a/0003-TST-Increase-maximum-for-sizeof-test-to-pass-32-bit.patch +++ b/0003-TST-Increase-maximum-for-sizeof-test-to-pass-32-bit.patch @@ -1,7 +1,7 @@ -From 088f8ba277b42e083f0fa74d7f162d42281b9f62 Mon Sep 17 00:00:00 2001 +From b013f58c1bf2a06a4dc4aaf9b92d8921f75a2345 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Thu, 20 Jul 2023 00:05:48 -0400 -Subject: [PATCH 3/3] TST: Increase maximum for sizeof test to pass 32-bit +Subject: [PATCH 3/4] TST: Increase maximum for sizeof test to pass 32-bit In that case, the result is 1244, which is ~2.6 * `sys.sizeof` (476), slightly over the 2 used in the existing test. diff --git a/0004-Fix-test_pandas_timestamp_overflow_pyarrow-condition.patch b/0004-Fix-test_pandas_timestamp_overflow_pyarrow-condition.patch new file mode 100644 index 0000000..6f3bb89 --- /dev/null +++ b/0004-Fix-test_pandas_timestamp_overflow_pyarrow-condition.patch @@ -0,0 +1,36 @@ +From 68ac2712572889ca0770d1ff5626bfa12c72e856 Mon Sep 17 00:00:00 2001 +From: Elliott Sales de Andrade +Date: Sat, 19 Aug 2023 16:49:33 -0400 +Subject: [PATCH 4/4] Fix test_pandas_timestamp_overflow_pyarrow condition + +The new behavior in pyarrow only occurs with Pandas 2.0 as well. + +Signed-off-by: Elliott Sales de Andrade +--- + dask/dataframe/io/tests/test_parquet.py | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/dask/dataframe/io/tests/test_parquet.py b/dask/dataframe/io/tests/test_parquet.py +index 6c53cd6c..21e39657 100644 +--- a/dask/dataframe/io/tests/test_parquet.py ++++ b/dask/dataframe/io/tests/test_parquet.py +@@ -3373,13 +3373,13 @@ def test_pandas_timestamp_overflow_pyarrow(tmpdir): + table, f"{tmpdir}/file.parquet", use_deprecated_int96_timestamps=False + ) + +- if pyarrow_version < parse_version("13.0.0.dev"): ++ if pyarrow_version.major >= 13 and PANDAS_GE_200: ++ dd.read_parquet(str(tmpdir), engine="pyarrow").compute() ++ else: + # This will raise by default due to overflow + with pytest.raises(pa.lib.ArrowInvalid) as e: + dd.read_parquet(str(tmpdir), engine="pyarrow").compute() + assert "out of bounds" in str(e.value) +- else: +- dd.read_parquet(str(tmpdir), engine="pyarrow").compute() + + from dask.dataframe.io.parquet.arrow import ArrowDatasetEngine as ArrowEngine + +-- +2.41.0 + diff --git a/python-dask.spec b/python-dask.spec index c1ea70e..e646454 100644 --- a/python-dask.spec +++ b/python-dask.spec @@ -10,8 +10,8 @@ %global debug_package %{nil} Name: python-%{srcname} -Version: 2023.7.1 -%global tag 2023.7.1 +Version: 2023.8.1 +%global tag 2023.8.1 Release: %autorelease Summary: Parallel PyData with Task Scheduling @@ -24,6 +24,8 @@ Patch: 0001-Skip-test_encoding_gh601-on-big-endian-machines.patch Patch: 0002-Skip-coverage-testing.patch # Drop after dropping 32-bit support. Patch: 0003-TST-Increase-maximum-for-sizeof-test-to-pass-32-bit.patch +# https://github.com/dask/dask/pull/10451 +Patch: 0004-Fix-test_pandas_timestamp_overflow_pyarrow-condition.patch %description Dask is a flexible parallel computing library for analytics. diff --git a/sources b/sources index 3f9f634..a7a3dad 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (dask-2023.7.1.tar.gz) = 6abed41ae7c3631338c9afb6db8fe69d9ec0c30c9e6ae44c0f3bded6d5a6a7000c6ae7c06a92f4a64304f8a3cfca78f9f58ddaa59b588669e36e9ee446e8c849 +SHA512 (dask-2023.8.1.tar.gz) = ec8be64bdeef51bcc3168aa83073c2884950290ceb41b7f4d56553e91c507a227862b3bc827c67ce8a0b9b761955f76897cb073f5d9f5fb91cb2a966c01c6084