diff --git a/0001-Skip-test_encoding_gh601-on-big-endian-machines.patch b/0001-Skip-test_encoding_gh601-on-big-endian-machines.patch index 3e6dcbc..341af10 100644 --- a/0001-Skip-test_encoding_gh601-on-big-endian-machines.patch +++ b/0001-Skip-test_encoding_gh601-on-big-endian-machines.patch @@ -1,7 +1,7 @@ From 0d43bed7efaf54dc2c40d853ce19cf62de128370 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Sun, 7 Mar 2021 04:07:32 -0500 -Subject: [PATCH] Skip test_encoding_gh601 on big-endian machines. +Subject: [PATCH 1/3] Skip test_encoding_gh601 on big-endian machines. Signed-off-by: Elliott Sales de Andrade --- diff --git a/0002-Use-packaging-for-version-comparisons.patch b/0002-Use-packaging-for-version-comparisons.patch new file mode 100644 index 0000000..1f72bbc --- /dev/null +++ b/0002-Use-packaging-for-version-comparisons.patch @@ -0,0 +1,878 @@ +From ab18b267a2eeeaa60bda03cd67b21880c91d4766 Mon Sep 17 00:00:00 2001 +From: Elliott Sales de Andrade +Date: Sun, 13 Jun 2021 05:37:41 -0400 +Subject: [PATCH 2/3] Use packaging for version comparisons. + +Signed-off-by: Elliott Sales de Andrade +--- + dask/array/numpy_compat.py | 11 ++--- + dask/array/tests/test_cupy.py | 22 +++++----- + dask/array/tests/test_sparse.py | 6 +-- + dask/base.py | 4 +- + dask/bytes/tests/test_http.py | 4 +- + dask/bytes/tests/test_local.py | 4 +- + dask/bytes/tests/test_s3.py | 12 ++--- + dask/dataframe/_compat.py | 18 ++++---- + dask/dataframe/io/orc.py | 7 ++- + dask/dataframe/io/parquet/arrow.py | 12 ++--- + dask/dataframe/io/parquet/core.py | 7 +-- + dask/dataframe/io/parquet/fastparquet.py | 4 +- + dask/dataframe/io/tests/test_orc.py | 4 +- + dask/dataframe/io/tests/test_parquet.py | 56 ++++++++++++------------ + dask/dataframe/tests/test_rolling.py | 6 +-- + dask/diagnostics/profile_visualize.py | 6 +-- + dask/diagnostics/tests/test_profiler.py | 10 ++--- + dask/sizeof.py | 5 ++- + dask/tests/test_multiprocessing.py | 7 +-- + setup.py | 1 + + 20 files changed, 103 insertions(+), 103 deletions(-) + +diff --git a/dask/array/numpy_compat.py b/dask/array/numpy_compat.py +index 60d043d8..134c5839 100644 +--- a/dask/array/numpy_compat.py ++++ b/dask/array/numpy_compat.py +@@ -1,14 +1,15 @@ + import warnings +-from distutils.version import LooseVersion + + import numpy as np ++from packaging.version import parse as parse_version + + from ..utils import derived_from + +-_numpy_117 = LooseVersion(np.__version__) >= "1.17.0" +-_numpy_118 = LooseVersion(np.__version__) >= "1.18.0" +-_numpy_120 = LooseVersion(np.__version__) >= "1.20.0" +-_numpy_121 = LooseVersion(np.__version__) >= "1.21.0" ++_np_version = parse_version(np.__version__) ++_numpy_117 = _np_version >= parse_version("1.17.0") ++_numpy_118 = _np_version >= parse_version("1.18.0") ++_numpy_120 = _np_version >= parse_version("1.20.0") ++_numpy_121 = _np_version >= parse_version("1.21.0") + + + # Taken from scikit-learn: +diff --git a/dask/array/tests/test_cupy.py b/dask/array/tests/test_cupy.py +index 26d5d3a4..be5c77df 100644 +--- a/dask/array/tests/test_cupy.py ++++ b/dask/array/tests/test_cupy.py +@@ -1,7 +1,6 @@ +-from distutils.version import LooseVersion +- + import numpy as np + import pytest ++from packaging.version import parse as parse_version + + import dask + import dask.array as da +@@ -12,6 +11,7 @@ from dask.sizeof import sizeof + + cupy = pytest.importorskip("cupy") + cupyx = pytest.importorskip("cupyx") ++cupy_version = parse_version(cupy.__version__) + + + functions = [ +@@ -35,7 +35,7 @@ functions = [ + pytest.param( + lambda x: x.mean(), + marks=pytest.mark.skipif( +- not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"), ++ not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"), + reason="NEP-18 support is not available in NumPy or CuPy older than " + "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)", + ), +@@ -47,7 +47,7 @@ functions = [ + pytest.param( + lambda x: x.std(), + marks=pytest.mark.skipif( +- not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"), ++ not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"), + reason="NEP-18 support is not available in NumPy or CuPy older than " + "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)", + ), +@@ -55,7 +55,7 @@ functions = [ + pytest.param( + lambda x: x.var(), + marks=pytest.mark.skipif( +- not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"), ++ not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"), + reason="NEP-18 support is not available in NumPy or CuPy older than " + "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)", + ), +@@ -318,7 +318,7 @@ def test_diagonal(): + + + @pytest.mark.skipif( +- not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"), ++ not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"), + reason="NEP-18 support is not available in NumPy or CuPy older than " + "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)", + ) +@@ -336,7 +336,7 @@ def test_tril_triu(): + + + @pytest.mark.skipif( +- not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"), ++ not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"), + reason="NEP-18 support is not available in NumPy or CuPy older than " + "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)", + ) +@@ -448,7 +448,7 @@ def test_nearest(): + + + @pytest.mark.skipif( +- not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"), ++ not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"), + reason="NEP-18 support is not available in NumPy or CuPy older than " + "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)", + ) +@@ -465,7 +465,7 @@ def test_constant(): + + + @pytest.mark.skipif( +- not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"), ++ not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"), + reason="NEP-18 support is not available in NumPy or CuPy older than " + "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)", + ) +@@ -556,7 +556,7 @@ def test_random_shapes(shape): + + + @pytest.mark.skipif( +- not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.1.0"), ++ not IS_NEP18_ACTIVE or cupy_version < parse_version("6.1.0"), + reason="NEP-18 support is not available in NumPy or CuPy older than " + "6.1.0 (requires https://github.com/cupy/cupy/pull/2209)", + ) +@@ -936,7 +936,7 @@ def test_cupy_sparse_concatenate(axis): + + @pytest.mark.skipif(not _numpy_120, reason="NEP-35 is not available") + @pytest.mark.skipif( +- not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"), ++ not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"), + reason="NEP-18 support is not available in NumPy or CuPy older than " + "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)", + ) +diff --git a/dask/array/tests/test_sparse.py b/dask/array/tests/test_sparse.py +index df92f3e6..8ff041c9 100644 +--- a/dask/array/tests/test_sparse.py ++++ b/dask/array/tests/test_sparse.py +@@ -1,8 +1,8 @@ + import random +-from distutils.version import LooseVersion + + import numpy as np + import pytest ++from packaging.version import parse as parse_version + + import dask + import dask.array as da +@@ -34,7 +34,7 @@ functions = [ + pytest.param( + lambda x: x.mean(), + marks=pytest.mark.skipif( +- sparse.__version__ >= LooseVersion("0.12.0"), ++ parse_version(sparse.__version__) >= parse_version("0.12.0"), + reason="https://github.com/dask/dask/issues/7169", + ), + ), +@@ -91,7 +91,7 @@ def test_basic(func): + + + @pytest.mark.skipif( +- sparse.__version__ < LooseVersion("0.7.0+10"), ++ parse_version(sparse.__version__) < parse_version("0.7.0+10"), + reason="fixed in https://github.com/pydata/sparse/pull/256", + ) + def test_tensordot(): +diff --git a/dask/base.py b/dask/base.py +index 59c4640d..1cd48cac 100644 +--- a/dask/base.py ++++ b/dask/base.py +@@ -6,13 +6,13 @@ import uuid + from collections import OrderedDict + from contextlib import contextmanager + from dataclasses import fields, is_dataclass +-from distutils.version import LooseVersion + from functools import partial + from hashlib import md5 + from numbers import Number + from operator import getitem + from typing import Iterator, Mapping, Set + ++from packaging.version import parse as parse_version + from tlz import curry, groupby, identity, merge + from tlz.functoolz import Compose + +@@ -898,7 +898,7 @@ def _normalize_function(func): + def register_pandas(): + import pandas as pd + +- PANDAS_GT_130 = LooseVersion(pd.__version__) >= LooseVersion("1.3.0") ++ PANDAS_GT_130 = parse_version(pd.__version__) >= parse_version("1.3.0") + + @normalize_token.register(pd.Index) + def normalize_index(ind): +diff --git a/dask/bytes/tests/test_http.py b/dask/bytes/tests/test_http.py +index bee444f5..3a05ef81 100644 +--- a/dask/bytes/tests/test_http.py ++++ b/dask/bytes/tests/test_http.py +@@ -2,11 +2,11 @@ import os + import subprocess + import sys + import time +-from distutils.version import LooseVersion + + import fsspec + import pytest + from fsspec.core import open_files ++from packaging.version import parse as parse_version + + import dask.bag as db + from dask.utils import tmpdir +@@ -14,7 +14,7 @@ from dask.utils import tmpdir + files = ["a", "b"] + requests = pytest.importorskip("requests") + errs = (requests.exceptions.RequestException,) +-if LooseVersion(fsspec.__version__) > "0.7.4": ++if parse_version(fsspec.__version__) > parse_version("0.7.4"): + aiohttp = pytest.importorskip("aiohttp") + errs = errs + (aiohttp.client_exceptions.ClientResponseError,) + +diff --git a/dask/bytes/tests/test_local.py b/dask/bytes/tests/test_local.py +index 40b161c7..5564f92c 100644 +--- a/dask/bytes/tests/test_local.py ++++ b/dask/bytes/tests/test_local.py +@@ -2,7 +2,6 @@ import gzip + import os + import pathlib + import sys +-from distutils.version import LooseVersion + from functools import partial + from time import sleep + +@@ -11,6 +10,7 @@ import pytest + from fsspec.compression import compr + from fsspec.core import open_files + from fsspec.implementations.local import LocalFileSystem ++from packaging.version import parse as parse_version + from tlz import concat, valmap + + from dask import compute +@@ -356,7 +356,7 @@ def test_get_pyarrow_filesystem(): + from fsspec.implementations.local import LocalFileSystem + + pa = pytest.importorskip("pyarrow") +- if pa.__version__ >= LooseVersion("2.0.0"): ++ if parse_version(pa.__version__).major >= 2: + pytest.skip("fsspec no loger inherits from pyarrow>=2.0.") + + fs = LocalFileSystem() +diff --git a/dask/bytes/tests/test_s3.py b/dask/bytes/tests/test_s3.py +index 1412de3e..b24b30b5 100644 +--- a/dask/bytes/tests/test_s3.py ++++ b/dask/bytes/tests/test_s3.py +@@ -5,10 +5,10 @@ import subprocess + import sys + import time + from contextlib import contextmanager +-from distutils.version import LooseVersion + from functools import partial + + import pytest ++from packaging.version import parse as parse_version + + s3fs = pytest.importorskip("s3fs") + boto3 = pytest.importorskip("boto3") +@@ -438,13 +438,13 @@ def test_parquet(s3, engine, s3so, metadata_file): + from dask.dataframe._compat import tm + + lib = pytest.importorskip(engine) +- if engine == "pyarrow" and LooseVersion(lib.__version__) < "0.13.1": ++ lib_version = parse_version(lib.__version__) ++ if engine == "pyarrow" and lib_version < parse_version("0.13.1"): + pytest.skip("pyarrow < 0.13.1 not supported for parquet") + if ( + engine == "pyarrow" +- and LooseVersion(lib.__version__) >= "2.0" +- and LooseVersion(lib.__version__) < "3.0" +- and LooseVersion(s3fs.__version__) > "0.5.0" ++ and lib_version.major == 2 ++ and parse_version(s3fs.__version__) > parse_version("0.5.0") + ): + pytest.skip("#7056 - new s3fs not supported before pyarrow 3.0") + +@@ -507,7 +507,7 @@ def test_parquet_wstoragepars(s3, s3so): + + def test_get_pyarrow_fs_s3(s3): + pa = pytest.importorskip("pyarrow") +- if pa.__version__ >= LooseVersion("2.0.0"): ++ if parse_version(pa.__version__).major >= 2: + pytest.skip("fsspec no loger inherits from pyarrow>=2.0.") + fs = DaskS3FileSystem(anon=True) + assert isinstance(fs, pa.filesystem.FileSystem) +diff --git a/dask/dataframe/_compat.py b/dask/dataframe/_compat.py +index 2a0b17fc..5e76d106 100644 +--- a/dask/dataframe/_compat.py ++++ b/dask/dataframe/_compat.py +@@ -1,16 +1,16 @@ + import string +-from distutils.version import LooseVersion + + import numpy as np + import pandas as pd +- +-PANDAS_VERSION = LooseVersion(pd.__version__) +-PANDAS_GT_100 = PANDAS_VERSION >= LooseVersion("1.0.0") +-PANDAS_GT_104 = PANDAS_VERSION >= LooseVersion("1.0.4") +-PANDAS_GT_110 = PANDAS_VERSION >= LooseVersion("1.1.0") +-PANDAS_GT_120 = PANDAS_VERSION >= LooseVersion("1.2.0") +-PANDAS_GT_121 = PANDAS_VERSION >= LooseVersion("1.2.1") +-PANDAS_GT_130 = PANDAS_VERSION >= LooseVersion("1.3.0") ++from packaging.version import parse as parse_version ++ ++PANDAS_VERSION = parse_version(pd.__version__) ++PANDAS_GT_100 = PANDAS_VERSION >= parse_version("1.0.0") ++PANDAS_GT_104 = PANDAS_VERSION >= parse_version("1.0.4") ++PANDAS_GT_110 = PANDAS_VERSION >= parse_version("1.1.0") ++PANDAS_GT_120 = PANDAS_VERSION >= parse_version("1.2.0") ++PANDAS_GT_121 = PANDAS_VERSION >= parse_version("1.2.1") ++PANDAS_GT_130 = PANDAS_VERSION >= parse_version("1.3.0") + + + if PANDAS_GT_100: +diff --git a/dask/dataframe/io/orc.py b/dask/dataframe/io/orc.py +index 40740310..69eca3c5 100644 +--- a/dask/dataframe/io/orc.py ++++ b/dask/dataframe/io/orc.py +@@ -1,6 +1,5 @@ +-from distutils.version import LooseVersion +- + from fsspec.core import get_fs_token_paths ++from packaging.version import parse as parse_version + + from ...base import tokenize + from ...highlevelgraph import HighLevelGraph +@@ -49,7 +48,7 @@ def _read_orc_stripe(fs, path, stripe, columns=None): + with fs.open(path, "rb") as f: + o = orc.ORCFile(f) + table = o.read_stripe(stripe, columns) +- if pa.__version__ < LooseVersion("0.11.0"): ++ if parse_version(pa.__version__) < parse_version("0.11.0"): + return table.to_pandas() + else: + return table.to_pandas(date_as_object=False) +@@ -80,7 +79,7 @@ def read_orc(path, columns=None, storage_options=None): + orc = import_required("pyarrow.orc", "Please install pyarrow >= 0.9.0") + import pyarrow as pa + +- if LooseVersion(pa.__version__) == "0.10.0": ++ if parse_version(pa.__version__) == parse_version("0.10.0"): + raise RuntimeError( + "Due to a bug in pyarrow 0.10.0, the ORC reader is " + "unavailable. Please either downgrade pyarrow to " +diff --git a/dask/dataframe/io/parquet/arrow.py b/dask/dataframe/io/parquet/arrow.py +index 99227157..83bb4bf8 100644 +--- a/dask/dataframe/io/parquet/arrow.py ++++ b/dask/dataframe/io/parquet/arrow.py +@@ -2,13 +2,13 @@ import json + import warnings + from collections import defaultdict + from datetime import datetime +-from distutils.version import LooseVersion + from functools import partial + + import numpy as np + import pandas as pd + import pyarrow as pa + import pyarrow.parquet as pq ++from packaging.version import parse as parse_version + + from dask import delayed + +@@ -27,14 +27,16 @@ from .utils import ( + ) + + # Check PyArrow version for feature support +-preserve_ind_supported = pa.__version__ >= LooseVersion("0.15.0") ++_pa_version = parse_version(pa.__version__) ++preserve_ind_supported = _pa_version >= parse_version("0.15.0") + read_row_groups_supported = preserve_ind_supported +-if pa.__version__ >= LooseVersion("1.0.0"): ++if _pa_version.major >= 1: + from pyarrow import dataset as pa_ds + else: + pa_ds = None +-subset_stats_supported = pa.__version__ > LooseVersion("2.0.0") +-schema_field_supported = pa.__version__ >= LooseVersion("0.15.0") ++subset_stats_supported = _pa_version > parse_version("2.0.0") ++schema_field_supported = _pa_version >= parse_version("0.15.0") ++del _pa_version + + # + # Helper Utilities +diff --git a/dask/dataframe/io/parquet/core.py b/dask/dataframe/io/parquet/core.py +index e11de5e3..00ab6ee0 100644 +--- a/dask/dataframe/io/parquet/core.py ++++ b/dask/dataframe/io/parquet/core.py +@@ -1,11 +1,11 @@ + import math + import warnings +-from distutils.version import LooseVersion + + import tlz as toolz + from fsspec.core import get_fs_token_paths + from fsspec.implementations.local import LocalFileSystem + from fsspec.utils import stringify_path ++from packaging.version import parse as parse_version + + from ....base import tokenize + from ....delayed import Delayed +@@ -844,11 +844,12 @@ def get_engine(engine): + + elif engine in ("pyarrow", "arrow", "pyarrow-legacy", "pyarrow-dataset"): + pa = import_required("pyarrow", "`pyarrow` not installed") ++ pa_version = parse_version(pa.__version__) + +- if LooseVersion(pa.__version__) < "0.13.1": ++ if pa_version < parse_version("0.13.1"): + raise RuntimeError("PyArrow version >= 0.13.1 required") + +- if engine == "pyarrow-dataset" and LooseVersion(pa.__version__) >= "1.0.0": ++ if engine == "pyarrow-dataset" and pa_version.major >= 1: + from .arrow import ArrowDatasetEngine + + _ENGINES[engine] = eng = ArrowDatasetEngine +diff --git a/dask/dataframe/io/parquet/fastparquet.py b/dask/dataframe/io/parquet/fastparquet.py +index 5c817294..ae28c792 100644 +--- a/dask/dataframe/io/parquet/fastparquet.py ++++ b/dask/dataframe/io/parquet/fastparquet.py +@@ -3,11 +3,11 @@ import json + import pickle + import warnings + from collections import OrderedDict, defaultdict +-from distutils.version import LooseVersion + + import numpy as np + import pandas as pd + import tlz as toolz ++from packaging.version import parse as parse_version + + try: + import fastparquet +@@ -924,7 +924,7 @@ class FastParquetEngine(Engine): + rgs = [] + elif partition_on: + mkdirs = lambda x: fs.mkdirs(x, exist_ok=True) +- if LooseVersion(fastparquet.__version__) >= "0.1.4": ++ if parse_version(fastparquet.__version__) >= parse_version("0.1.4"): + rgs = partition_on_columns( + df, partition_on, path, filename, fmd, compression, fs.open, mkdirs + ) +diff --git a/dask/dataframe/io/tests/test_orc.py b/dask/dataframe/io/tests/test_orc.py +index 1c1ca00e..580a7ded 100644 +--- a/dask/dataframe/io/tests/test_orc.py ++++ b/dask/dataframe/io/tests/test_orc.py +@@ -1,9 +1,9 @@ + import os + import shutil + import tempfile +-from distutils.version import LooseVersion + + import pytest ++from packaging.version import parse as parse_version + + import dask.dataframe as dd + from dask.dataframe import read_orc +@@ -16,7 +16,7 @@ pytest.importorskip("pyarrow.orc") + import pyarrow as pa + + pytestmark = pytest.mark.skipif( +- LooseVersion(pa.__version__) == "0.10.0", ++ parse_version(pa.__version__).base_version == parse_version("0.10.0"), + reason=( + "PyArrow 0.10.0 release broke the ORC reader, see " + "https://issues.apache.org/jira/browse/ARROW-3009" +diff --git a/dask/dataframe/io/tests/test_parquet.py b/dask/dataframe/io/tests/test_parquet.py +index a5a1244a..28df8771 100644 +--- a/dask/dataframe/io/tests/test_parquet.py ++++ b/dask/dataframe/io/tests/test_parquet.py +@@ -4,11 +4,11 @@ import os + import sys + import warnings + from decimal import Decimal +-from distutils.version import LooseVersion + + import numpy as np + import pandas as pd + import pytest ++from packaging.version import parse as parse_version + + import dask + import dask.dataframe as dd +@@ -25,12 +25,18 @@ try: + import fastparquet + except ImportError: + fastparquet = False ++ fastparquet_version = parse_version("0") ++else: ++ fastparquet_version = parse_version(fastparquet.__version__) + + + try: + import pyarrow as pa + except ImportError: + pa = False ++ pa_version = parse_version("0") ++else: ++ pa_version = parse_version(pa.__version__) + + try: + import pyarrow.parquet as pq +@@ -41,7 +47,7 @@ except ImportError: + SKIP_FASTPARQUET = not fastparquet + FASTPARQUET_MARK = pytest.mark.skipif(SKIP_FASTPARQUET, reason="fastparquet not found") + +-if pq and pa.__version__ < LooseVersion("0.13.1"): ++if pq and pa_version < parse_version("0.13.1"): + SKIP_PYARROW = True + SKIP_PYARROW_REASON = "pyarrow >= 0.13.1 required for parquet" + else: +@@ -49,8 +55,8 @@ else: + sys.platform == "win32" + and pa + and ( +- (pa.__version__ == LooseVersion("0.16.0")) +- or (pa.__version__ == LooseVersion("2.0.0")) ++ pa_version == parse_version("0.16.0") ++ or pa_version == parse_version("2.0.0") + ) + ): + SKIP_PYARROW = True +@@ -64,7 +70,7 @@ else: + SKIP_PYARROW_REASON = "pyarrow not found" + PYARROW_MARK = pytest.mark.skipif(SKIP_PYARROW, reason=SKIP_PYARROW_REASON) + +-if pa and pa.__version__ < LooseVersion("1.0.0"): ++if pa and pa_version.major < 1: + SKIP_PYARROW_DS = True + SKIP_PYARROW_DS_REASON = "pyarrow >= 1.0.0 required for pyarrow dataset API" + else: +@@ -147,7 +153,7 @@ write_read_engines_xfail = write_read_engines( + + if ( + fastparquet +- and fastparquet.__version__ < LooseVersion("0.5") ++ and fastparquet_version < parse_version("0.5") + and PANDAS_GT_110 + and not PANDAS_GT_121 + ): +@@ -822,7 +828,7 @@ def test_append_dict_column(tmpdir, engine): + + if engine == "fastparquet": + pytest.xfail("Fastparquet engine is missing dict-column support") +- elif pa.__version__ < LooseVersion("1.0.1"): ++ elif pa_version < parse_version("1.0.1"): + pytest.skip("Newer PyArrow version required for dict-column support.") + + tmp = str(tmpdir) +@@ -981,7 +987,7 @@ def test_categories_unnamed_index(tmpdir, engine): + # Check that we can handle an unnamed categorical index + # https://github.com/dask/dask/issues/6885 + +- if engine.startswith("pyarrow") and pa.__version__ < LooseVersion("0.15.0"): ++ if engine.startswith("pyarrow") and pa_version < parse_version("0.15.0"): + pytest.skip("PyArrow>=0.15 Required.") + + tmpdir = str(tmpdir) +@@ -1166,7 +1172,7 @@ def test_to_parquet_pyarrow_w_inconsistent_schema_by_partition_succeeds_w_manual + @pytest.mark.parametrize("index", [False, True]) + @pytest.mark.parametrize("schema", ["infer", "complex"]) + def test_pyarrow_schema_inference(tmpdir, index, engine, schema): +- if pa.__version__ < LooseVersion("0.15.0"): ++ if pa_version < parse_version("0.15.0"): + pytest.skip("PyArrow>=0.15 Required.") + if schema == "complex": + schema = {"index": pa.string(), "amount": pa.int64()} +@@ -1359,9 +1365,7 @@ def test_filters_v0(tmpdir, write_engine, read_engine): + + # Recent versions of pyarrow support full row-wise filtering + # (fastparquet and older pyarrow versions do not) +- pyarrow_row_filtering = ( +- read_engine == "pyarrow-dataset" and pa.__version__ >= LooseVersion("1.0.0") +- ) ++ pyarrow_row_filtering = read_engine == "pyarrow-dataset" and pa_version.major >= 1 + + fn = str(tmpdir) + df = pd.DataFrame({"at": ["ab", "aa", "ba", "da", "bb"]}) +@@ -1462,7 +1466,7 @@ def test_pyarrow_filter_divisions(tmpdir): + str(tmpdir.join("file.1.parquet")), engine="pyarrow", row_group_size=2 + ) + +- if pa.__version__ >= LooseVersion("1.0.0"): ++ if pa_version.major >= 1: + # Only works for ArrowDatasetEngine. + # Legacy code will not apply filters on individual row-groups + # when `split_row_groups=False`. +@@ -1637,7 +1641,7 @@ def test_parquet_select_cats(tmpdir, engine): + + + def test_columns_name(tmpdir, engine): +- if engine == "fastparquet" and fastparquet.__version__ <= LooseVersion("0.3.1"): ++ if engine == "fastparquet" and fastparquet_version <= parse_version("0.3.1"): + pytest.skip("Fastparquet does not write column_indexes up to 0.3.1") + tmp_path = str(tmpdir) + df = pd.DataFrame({"A": [1, 2]}, index=pd.Index(["a", "b"], name="idx")) +@@ -2041,9 +2045,7 @@ def test_to_parquet_with_get(tmpdir): + def test_select_partitioned_column(tmpdir, engine): + pytest.importorskip("snappy") + if engine.startswith("pyarrow"): +- import pyarrow as pa +- +- if pa.__version__ < LooseVersion("0.9.0"): ++ if pa_version < parse_version("0.9.0"): + pytest.skip("pyarrow<0.9.0 did not support this") + + fn = str(tmpdir) +@@ -2067,9 +2069,9 @@ def test_select_partitioned_column(tmpdir, engine): + + + def test_with_tz(tmpdir, engine): +- if engine.startswith("pyarrow") and pa.__version__ < LooseVersion("0.11.0"): ++ if engine.startswith("pyarrow") and pa_version < parse_version("0.11.0"): + pytest.skip("pyarrow<0.11.0 did not support this") +- if engine == "fastparquet" and fastparquet.__version__ < LooseVersion("0.3.0"): ++ if engine == "fastparquet" and fastparquet_version < parse_version("0.3.0"): + pytest.skip("fastparquet<0.3.0 did not support this") + + with warnings.catch_warnings(): +@@ -2278,7 +2280,7 @@ def test_timeseries_nulls_in_schema(tmpdir, engine, schema): + if ( + schema == "infer" + and engine.startswith("pyarrow") +- and pa.__version__ < LooseVersion("0.15.0") ++ and pa_version < parse_version("0.15.0") + ): + pytest.skip("PyArrow>=0.15 Required.") + +@@ -2814,9 +2816,7 @@ def test_filter_nonpartition_columns( + + @PYARROW_MARK + def test_pandas_metadata_nullable_pyarrow(tmpdir): +- if pa.__version__ < LooseVersion("0.16.0") or pd.__version__ < LooseVersion( +- "1.0.0" +- ): ++ if pa_version < parse_version("0.16.0") or parse_version(pd.__version__).major < 1: + pytest.skip("PyArrow>=0.16 and Pandas>=1.0.0 Required.") + tmpdir = str(tmpdir) + +@@ -2837,7 +2837,7 @@ def test_pandas_metadata_nullable_pyarrow(tmpdir): + + @PYARROW_MARK + def test_pandas_timestamp_overflow_pyarrow(tmpdir): +- if pa.__version__ < LooseVersion("0.17.0"): ++ if pa.__version__ < parse_version("0.17.0"): + pytest.skip("PyArrow>=0.17 Required.") + + info = np.iinfo(np.dtype("int64")) +@@ -2947,7 +2947,7 @@ def test_partitioned_column_overlap(tmpdir, engine, write_cols): + @fp_pandas_xfail + def test_partitioned_preserve_index(tmpdir, write_engine, read_engine): + +- if write_engine.startswith("pyarrow") and pa.__version__ < LooseVersion("0.15.0"): ++ if write_engine.startswith("pyarrow") and pa_version < parse_version("0.15.0"): + pytest.skip("PyArrow>=0.15 Required.") + + tmp = str(tmpdir) +@@ -3065,7 +3065,7 @@ def test_pyarrow_dataset_simple(tmpdir, engine): + @PYARROW_MARK + @pytest.mark.parametrize("test_filter", [True, False]) + def test_pyarrow_dataset_partitioned(tmpdir, engine, test_filter): +- if pa.__version__ <= LooseVersion("0.17.1"): ++ if pa_version <= parse_version("0.17.1"): + # Using pyarrow.dataset API does not produce + # Categorical type for partitioned columns. + pytest.skip("PyArrow>0.17.1 Required.") +@@ -3093,7 +3093,7 @@ def test_pyarrow_dataset_partitioned(tmpdir, engine, test_filter): + def test_pyarrow_dataset_read_from_paths( + tmpdir, read_from_paths, test_filter_partitioned + ): +- if pa.__version__ <= LooseVersion("0.17.1"): ++ if pa_version <= parse_version("0.17.1"): + # Using pyarrow.dataset API does not produce + # Categorical type for partitioned columns. + pytest.skip("PyArrow>0.17.1 Required.") +@@ -3123,7 +3123,7 @@ def test_pyarrow_dataset_read_from_paths( + @PYARROW_MARK + @pytest.mark.parametrize("split_row_groups", [True, False]) + def test_pyarrow_dataset_filter_partitioned(tmpdir, split_row_groups): +- if pa.__version__ < LooseVersion("1.0.0"): ++ if pa_version.major < 1: + # pyarrow.dataset API required. + pytest.skip("PyArrow>=1.0.0 Required.") + +diff --git a/dask/dataframe/tests/test_rolling.py b/dask/dataframe/tests/test_rolling.py +index 19cb5b56..ea023b40 100644 +--- a/dask/dataframe/tests/test_rolling.py ++++ b/dask/dataframe/tests/test_rolling.py +@@ -1,8 +1,7 @@ +-from distutils.version import LooseVersion +- + import numpy as np + import pandas as pd + import pytest ++from packaging.version import parse as parse_version + + import dask.dataframe as dd + from dask.dataframe._compat import PANDAS_GT_130 +@@ -404,7 +403,8 @@ def test_rolling_agg_aggregate(): + @pytest.mark.skipif(not dd._compat.PANDAS_GT_100, reason="needs pandas>=1.0.0") + def test_rolling_numba_engine(): + numba = pytest.importorskip("numba") +- if not dd._compat.PANDAS_GT_104 and LooseVersion(numba.__version__) >= "0.49": ++ numba_version = parse_version(numba.__version__) ++ if not dd._compat.PANDAS_GT_104 and numba_version >= parse_version("0.49"): + # Was fixed in https://github.com/pandas-dev/pandas/pull/33687 + pytest.xfail("Known incompatibility between pandas and numba") + +diff --git a/dask/diagnostics/profile_visualize.py b/dask/diagnostics/profile_visualize.py +index 751957da..b62c6fba 100644 +--- a/dask/diagnostics/profile_visualize.py ++++ b/dask/diagnostics/profile_visualize.py +@@ -1,9 +1,9 @@ + import random + from bisect import bisect_left +-from distutils.version import LooseVersion + from itertools import cycle + from operator import add, itemgetter + ++from packaging.version import parse as parse_version + from tlz import accumulate, groupby, pluck, unique + + from ..core import istask +@@ -372,7 +372,7 @@ def plot_resources(results, palette="Viridis", **kwargs): + line_width=4, + **{ + "legend_label" +- if LooseVersion(bokeh.__version__) >= "1.4" ++ if parse_version(bokeh.__version__) >= parse_version("1.4") + else "legend": "% CPU" + } + ) +@@ -390,7 +390,7 @@ def plot_resources(results, palette="Viridis", **kwargs): + line_width=4, + **{ + "legend_label" +- if LooseVersion(bokeh.__version__) >= "1.4" ++ if parse_version(bokeh.__version__) >= parse_version("1.4") + else "legend": "Memory" + } + ) +diff --git a/dask/diagnostics/tests/test_profiler.py b/dask/diagnostics/tests/test_profiler.py +index 5d995b87..a31943f8 100644 +--- a/dask/diagnostics/tests/test_profiler.py ++++ b/dask/diagnostics/tests/test_profiler.py +@@ -1,10 +1,10 @@ + import contextlib + import os +-from distutils.version import LooseVersion + from operator import add, mul + from time import sleep + + import pytest ++from packaging.version import parse as parse_version + + from dask.diagnostics import CacheProfiler, Profiler, ResourceProfiler + from dask.threaded import get +@@ -326,10 +326,10 @@ def test_plot_multiple(): + p = visualize( + [prof, rprof], label_size=50, title="Not the default", show=False, save=False + ) +- bokeh_version = LooseVersion(bokeh.__version__) +- if bokeh_version >= "1.1.0": ++ bokeh_version = parse_version(bokeh.__version__) ++ if bokeh_version >= parse_version("1.1.0"): + figures = [r[0] for r in p.children[1].children] +- elif bokeh_version >= "0.12.0": ++ elif bokeh_version >= parse_version("0.12.0"): + figures = [r.children[0] for r in p.children[1].children] + else: + figures = [r[0] for r in p.children] +@@ -364,7 +364,7 @@ def test_get_colors(): + from dask.diagnostics.profile_visualize import get_colors + + # 256-color palettes were added in bokeh 1.4.0 +- if LooseVersion(bokeh.__version__) >= "1.4.0": ++ if parse_version(bokeh.__version__) >= parse_version("1.4.0"): + from bokeh.palettes import Blues256 + + funcs = list(range(11)) +diff --git a/dask/sizeof.py b/dask/sizeof.py +index 570b6251..38c06885 100644 +--- a/dask/sizeof.py ++++ b/dask/sizeof.py +@@ -2,7 +2,8 @@ import itertools + import random + import sys + from array import array +-from distutils.version import LooseVersion ++ ++from packaging.version import parse as parse_version + + from .utils import Dispatch + +@@ -195,7 +196,7 @@ def register_pyarrow(): + return int(_get_col_size(data)) + 1000 + + # Handle pa.Column for pyarrow < 0.15 +- if pa.__version__ < LooseVersion("0.15.0"): ++ if parse_version(pa.__version__) < parse_version("0.15.0"): + + @sizeof.register(pa.Column) + def sizeof_pyarrow_column(col): +diff --git a/dask/tests/test_multiprocessing.py b/dask/tests/test_multiprocessing.py +index 51c7044f..16cdca8b 100644 +--- a/dask/tests/test_multiprocessing.py ++++ b/dask/tests/test_multiprocessing.py +@@ -2,10 +2,8 @@ import multiprocessing + import pickle + import sys + from concurrent.futures import ProcessPoolExecutor +-from distutils.version import LooseVersion + from operator import add + +-import cloudpickle + import pytest + + import dask +@@ -49,13 +47,10 @@ def test_pickle_locals(): + + + @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL < 5, reason="requires pickle protocol 5") +-@pytest.mark.skipif( +- cloudpickle.__version__ < LooseVersion("1.3.0"), +- reason="requires cloudpickle >= 1.3.0", +-) + def test_out_of_band_pickling(): + """Test that out-of-band pickling works""" + np = pytest.importorskip("numpy") ++ pytest.importorskip("cloudpickle", minversion="1.3.0") + + a = np.arange(5) + +diff --git a/setup.py b/setup.py +index 41751134..9365926c 100755 +--- a/setup.py ++++ b/setup.py +@@ -22,6 +22,7 @@ extras_require["complete"] = sorted({v for req in extras_require.values() for v + extras_require["test"] = ["pytest", "pytest-rerunfailures", "pytest-xdist"] + + install_requires = [ ++ "packaging", + "pyyaml", + "cloudpickle >= 1.1.1", + "fsspec >= 0.6.0", +-- +2.31.1 + diff --git a/0003-fix-index_col-duplication-if-index_col-is-type-str.patch b/0003-fix-index_col-duplication-if-index_col-is-type-str.patch new file mode 100644 index 0000000..a28378f --- /dev/null +++ b/0003-fix-index_col-duplication-if-index_col-is-type-str.patch @@ -0,0 +1,30 @@ +From 57cf597227fb78a42cabbeedab146260e6f485e2 Mon Sep 17 00:00:00 2001 +From: McToel +Date: Sun, 16 May 2021 11:11:06 +0200 +Subject: [PATCH 3/3] fix index_col duplication if index_col is type str + +Signed-off-by: Elliott Sales de Andrade +--- + dask/dataframe/io/sql.py | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/dask/dataframe/io/sql.py b/dask/dataframe/io/sql.py +index 112876e2..d698c494 100644 +--- a/dask/dataframe/io/sql.py ++++ b/dask/dataframe/io/sql.py +@@ -125,10 +125,8 @@ def read_sql_table( + if columns + else list(table.columns) + ) +- if index_col not in columns: +- columns.append( +- table.columns[index_col] if isinstance(index_col, str) else index_col +- ) ++ if index not in columns: ++ columns.append(index) + + if isinstance(index_col, str): + kwargs["index_col"] = index_col +-- +2.31.1 + diff --git a/python-dask.spec b/python-dask.spec index 45fc874..33a98b7 100644 --- a/python-dask.spec +++ b/python-dask.spec @@ -15,6 +15,10 @@ URL: https://github.com/dask/dask/ Source0: %pypi_source # https://github.com/dask/dask/issues/6725 Patch0001: 0001-Skip-test_encoding_gh601-on-big-endian-machines.patch +# https://github.com/dask/dask/pull/7280 +Patch0002: 0002-Use-packaging-for-version-comparisons.patch +# https://github.com/dask/dask/pull/7661 +Patch0003: 0003-fix-index_col-duplication-if-index_col-is-type-str.patch BuildArch: noarch @@ -169,10 +173,6 @@ Documentation for dask. # Remove bundled egg-info rm -rf %{srcname}.egg-info -# Disable failing on deprecation warnings. -# It seems upstream mistook the distribution system for its own CI. -sed -r -i 's/filterwarnings =/\0\n ignore::DeprecationWarning/; /error:::/d' setup.cfg - %build %py3_build @@ -198,10 +198,7 @@ pytest_args=( -m 'not network' # https://bugzilla.redhat.com/show_bug.cgi?id=1968947#c4 - --ignore=dask/dataframe/io/tests/test_sql.py - - # Those also fail, but don't seem very important. - --ignore=dask/tests/test_config.py::test_collect_yaml_permission_errors + --deselect=dask/dataframe/io/tests/test_sql.py::test_select_from_select -n %[0%{?have_arm}?"2":"auto"] )