#18 Backport proper pyarrow 10 and 11 support
Merged a year ago by music. Opened a year ago by music.
rpms/ music/python-pandas pyarrow10  into  rawhide

@@ -0,0 +1,335 @@ 

+ From 43d4450e7e7386eb3aebb286b6101889c32ba52c Mon Sep 17 00:00:00 2001

+ From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>

+ Date: Sat, 24 Dec 2022 20:49:35 +0100

+ Subject: [PATCH 1/3] CI: Unpin pyarrow<10 (#50314)

+ 

+ * CI: Unpin pyarrow<10

+ 

+ * Skip test

+ ---

+  .github/actions/setup-conda/action.yml          | 2 +-

+  .github/workflows/ubuntu.yml                    | 2 +-

+  ci/deps/actions-310.yaml                        | 2 +-

+  ci/deps/actions-38-downstream_compat.yaml       | 2 +-

+  ci/deps/actions-38.yaml                         | 2 +-

+  ci/deps/actions-39.yaml                         | 2 +-

+  ci/deps/circle-38-arm64.yaml                    | 2 +-

+  environment.yml                                 | 2 +-

+  pandas/compat/pyarrow.py                        | 2 ++

+  pandas/tests/arrays/interval/test_interval.py   | 2 +-

+  pandas/tests/arrays/period/test_arrow_compat.py | 3 +++

+  requirements-dev.txt                            | 2 +-

+  12 files changed, 15 insertions(+), 10 deletions(-)

+ 

+ diff --git a/.github/actions/setup-conda/action.yml b/.github/actions/setup-conda/action.yml

+ index 7d1e54052f..002d0020c2 100644

+ --- a/.github/actions/setup-conda/action.yml

+ +++ b/.github/actions/setup-conda/action.yml

+ @@ -18,7 +18,7 @@ runs:

+      - name: Set Arrow version in ${{ inputs.environment-file }} to ${{ inputs.pyarrow-version }}

+        run: |

+          grep -q '  - pyarrow' ${{ inputs.environment-file }}

+ -        sed -i"" -e "s/  - pyarrow<10/  - pyarrow=${{ inputs.pyarrow-version }}/" ${{ inputs.environment-file }}

+ +        sed -i"" -e "s/  - pyarrow/  - pyarrow=${{ inputs.pyarrow-version }}/" ${{ inputs.environment-file }}

+          cat ${{ inputs.environment-file }}

+        shell: bash

+        if: ${{ inputs.pyarrow-version }}

+ diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml

+ index 4602d12d85..8fd69a4851 100644

+ --- a/.github/workflows/ubuntu.yml

+ +++ b/.github/workflows/ubuntu.yml

+ @@ -31,7 +31,7 @@ jobs:

+          pattern: ["not single_cpu", "single_cpu"]

+          # Don't test pyarrow v2/3: Causes timeouts in read_csv engine

+          # even if tests are skipped/xfailed

+ -        pyarrow_version: ["5", "6", "7"]

+ +        pyarrow_version: ["5", "6", "7", "8", "9", "10"]

+          include:

+            - name: "Downstream Compat"

+              env_file: actions-38-downstream_compat.yaml

+ diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml

+ index deb23d435b..6050a28e11 100644

+ --- a/ci/deps/actions-310.yaml

+ +++ b/ci/deps/actions-310.yaml

+ @@ -39,7 +39,7 @@ dependencies:

+    - psycopg2

+    - pymysql

+    - pytables

+ -  - pyarrow<10

+ +  - pyarrow

+    - pyreadstat

+    - python-snappy

+    - pyxlsb

+ diff --git a/ci/deps/actions-38-downstream_compat.yaml b/ci/deps/actions-38-downstream_compat.yaml

+ index 06ffafeb70..988eacdd0c 100644

+ --- a/ci/deps/actions-38-downstream_compat.yaml

+ +++ b/ci/deps/actions-38-downstream_compat.yaml

+ @@ -38,7 +38,7 @@ dependencies:

+    - odfpy

+    - pandas-gbq

+    - psycopg2

+ -  - pyarrow<10

+ +  - pyarrow

+    - pymysql

+    - pyreadstat

+    - pytables

+ diff --git a/ci/deps/actions-38.yaml b/ci/deps/actions-38.yaml

+ index 222da40ea9..131e2d1882 100644

+ --- a/ci/deps/actions-38.yaml

+ +++ b/ci/deps/actions-38.yaml

+ @@ -37,7 +37,7 @@ dependencies:

+    - odfpy

+    - pandas-gbq

+    - psycopg2

+ -  - pyarrow<10

+ +  - pyarrow

+    - pymysql

+    - pyreadstat

+    - pytables

+ diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml

+ index 1c60e8ad6d..62e7397757 100644

+ --- a/ci/deps/actions-39.yaml

+ +++ b/ci/deps/actions-39.yaml

+ @@ -38,7 +38,7 @@ dependencies:

+    - pandas-gbq

+    - psycopg2

+    - pymysql

+ -  - pyarrow<10

+ +  - pyarrow

+    - pyreadstat

+    - pytables

+    - python-snappy

+ diff --git a/ci/deps/circle-38-arm64.yaml b/ci/deps/circle-38-arm64.yaml

+ index 263521fb74..512c47f0a6 100644

+ --- a/ci/deps/circle-38-arm64.yaml

+ +++ b/ci/deps/circle-38-arm64.yaml

+ @@ -37,7 +37,7 @@ dependencies:

+    - odfpy

+    - pandas-gbq

+    - psycopg2

+ -  - pyarrow<10

+ +  - pyarrow

+    - pymysql

+    # Not provided on ARM

+    #- pyreadstat

+ diff --git a/environment.yml b/environment.yml

+ index 20f839db9a..1620bad9b0 100644

+ --- a/environment.yml

+ +++ b/environment.yml

+ @@ -38,7 +38,7 @@ dependencies:

+    - odfpy

+    - pandas-gbq

+    - psycopg2

+ -  - pyarrow<10

+ +  - pyarrow

+    - pymysql

+    - pyreadstat

+    - pytables

+ diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py

+ index 6965865acb..887ae49c3d 100644

+ --- a/pandas/compat/pyarrow.py

+ +++ b/pandas/compat/pyarrow.py

+ @@ -18,6 +18,7 @@ try:

+      pa_version_under7p0 = _palv < Version("7.0.0")

+      pa_version_under8p0 = _palv < Version("8.0.0")

+      pa_version_under9p0 = _palv < Version("9.0.0")

+ +    pa_version_under10p0 = _palv < Version("10.0.0")

+  except ImportError:

+      pa_version_under1p01 = True

+      pa_version_under2p0 = True

+ @@ -28,3 +29,4 @@ except ImportError:

+      pa_version_under7p0 = True

+      pa_version_under8p0 = True

+      pa_version_under9p0 = True

+ +    pa_version_under10p0 = True

+ diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py

+ index 2a6bea3255..0667082784 100644

+ --- a/pandas/tests/arrays/interval/test_interval.py

+ +++ b/pandas/tests/arrays/interval/test_interval.py

+ @@ -287,7 +287,7 @@ def test_arrow_array():

+      with pytest.raises(TypeError, match="Not supported to convert IntervalArray"):

+          pa.array(intervals, type="float64")

+  

+ -    with pytest.raises(TypeError, match="different 'subtype'"):

+ +    with pytest.raises(TypeError, match="different 'subtype'|to convert IntervalArray"):

+          pa.array(intervals, type=ArrowIntervalType(pa.float64(), "left"))

+  

+  

+ diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py

+ index 03fd146572..01a97d07a7 100644

+ --- a/pandas/tests/arrays/period/test_arrow_compat.py

+ +++ b/pandas/tests/arrays/period/test_arrow_compat.py

+ @@ -1,5 +1,7 @@

+  import pytest

+  

+ +from pandas.compat.pyarrow import pa_version_under10p0

+ +

+  from pandas.core.dtypes.dtypes import PeriodDtype

+  

+  import pandas as pd

+ @@ -26,6 +28,7 @@ def test_arrow_extension_type():

+      assert not hash(p1) == hash(p3)

+  

+  

+ +@pytest.mark.xfail(not pa_version_under10p0, reason="Wrong behavior with pyarrow 10")

+  @pytest.mark.parametrize(

+      "data, freq",

+      [

+ diff --git a/requirements-dev.txt b/requirements-dev.txt

+ index 95291e4ab5..1c7a011e5f 100644

+ --- a/requirements-dev.txt

+ +++ b/requirements-dev.txt

+ @@ -29,7 +29,7 @@ openpyxl

+  odfpy

+  pandas-gbq

+  psycopg2

+ -pyarrow<10

+ +pyarrow

+  pymysql

+  pyreadstat

+  tables

+ -- 

+ 2.39.2

+ 

+ 

+ From 5c2ced8f67fb248d6e5166b5dfdb03909de3123b Mon Sep 17 00:00:00 2001

+ From: "Benjamin A. Beasley" <code@musicinmybrain.net>

+ Date: Wed, 19 Apr 2023 11:36:21 -0400

+ Subject: [PATCH 2/3] Add pandas.compat.pa_version_under11p0

+ 

+ Partial backport of #50998 / 52306d9

+ ---

+  pandas/compat/__init__.py | 2 ++

+  pandas/compat/pyarrow.py  | 2 ++

+  2 files changed, 4 insertions(+)

+ 

+ diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py

+ index 80f66c945b..68f2495d0d 100644

+ --- a/pandas/compat/__init__.py

+ +++ b/pandas/compat/__init__.py

+ @@ -29,6 +29,7 @@ from pandas.compat.pyarrow import (

+      pa_version_under7p0,

+      pa_version_under8p0,

+      pa_version_under9p0,

+ +    pa_version_under11p0,

+  )

+  

+  if TYPE_CHECKING:

+ @@ -163,4 +164,5 @@ __all__ = [

+      "pa_version_under7p0",

+      "pa_version_under8p0",

+      "pa_version_under9p0",

+ +    "pa_version_under11p0",

+  ]

+ diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py

+ index 887ae49c3d..ca51d74828 100644

+ --- a/pandas/compat/pyarrow.py

+ +++ b/pandas/compat/pyarrow.py

+ @@ -19,6 +19,7 @@ try:

+      pa_version_under8p0 = _palv < Version("8.0.0")

+      pa_version_under9p0 = _palv < Version("9.0.0")

+      pa_version_under10p0 = _palv < Version("10.0.0")

+ +    pa_version_under11p0 = _palv < Version("11.0.0")

+  except ImportError:

+      pa_version_under1p01 = True

+      pa_version_under2p0 = True

+ @@ -30,3 +31,4 @@ except ImportError:

+      pa_version_under8p0 = True

+      pa_version_under9p0 = True

+      pa_version_under10p0 = True

+ +    pa_version_under11p0 = True

+ -- 

+ 2.39.2

+ 

+ 

+ From 8549bbe4e1144e2429612fa17e082ab9c1cba23f Mon Sep 17 00:00:00 2001

+ From: Luke Manley <lukemanley@gmail.com>

+ Date: Fri, 10 Feb 2023 13:15:08 -0500

+ Subject: [PATCH 3/3] CI: unpin pyarrow, fix failing test (#51175)

+ 

+ * unpin pyarrow, fix failing test

+ 

+ * cleanup

+ 

+ * handle NaT/NaN

+ ---

+  pandas/core/arrays/arrow/array.py    | 12 ++++++++++++

+  pandas/core/tools/timedeltas.py      |  4 +++-

+  pandas/tests/extension/test_arrow.py | 12 +++++++++++-

+  3 files changed, 26 insertions(+), 2 deletions(-)

+ 

+ diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py

+ index 4dfd8942c2..7031f4d80d 100644

+ --- a/pandas/core/arrays/arrow/array.py

+ +++ b/pandas/core/arrays/arrow/array.py

+ @@ -259,6 +259,18 @@ class ArrowExtensionArray(OpsMixin, ExtensionArray):

+              from pandas.core.tools.timedeltas import to_timedelta

+  

+              scalars = to_timedelta(strings, errors="raise")

+ +            if pa_type.unit != "ns":

+ +                # GH51175: test_from_sequence_of_strings_pa_array

+ +                # attempt to parse as int64 reflecting pyarrow's

+ +                # duration to string casting behavior

+ +                mask = isna(scalars)

+ +                if not isinstance(strings, (pa.Array, pa.ChunkedArray)):

+ +                    strings = pa.array(strings, type=pa.string(), from_pandas=True)

+ +                strings = pc.if_else(mask, None, strings)

+ +                try:

+ +                    scalars = strings.cast(pa.int64())

+ +                except pa.ArrowInvalid:

+ +                    pass

+          elif pa.types.is_time(pa_type):

+              from pandas.core.tools.times import to_time

+  

+ diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py

+ index 5026c97c0b..67ba7818d1 100644

+ --- a/pandas/core/tools/timedeltas.py

+ +++ b/pandas/core/tools/timedeltas.py

+ @@ -236,7 +236,9 @@ def _convert_listlike(arg, unit=None, errors="raise", name=None):

+          #  returning arg (errors == "ignore"), and where the input is a

+          #  generator, we return a useful list-like instead of a

+          #  used-up generator

+ -        arg = np.array(list(arg), dtype=object)

+ +        if not hasattr(arg, "__array__"):

+ +            arg = list(arg)

+ +        arg = np.array(arg, dtype=object)

+  

+      try:

+          td64arr = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0]

+ diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py

+ index b7ddb1f248..689bd29dfd 100644

+ --- a/pandas/tests/extension/test_arrow.py

+ +++ b/pandas/tests/extension/test_arrow.py

+ @@ -30,6 +30,7 @@ from pandas.compat import (

+      pa_version_under7p0,

+      pa_version_under8p0,

+      pa_version_under9p0,

+ +    pa_version_under11p0,

+  )

+  from pandas.errors import PerformanceWarning

+  

+ @@ -272,7 +273,7 @@ class TestConstructors(base.BaseConstructorsTests):

+                      reason="Nanosecond time parsing not supported.",

+                  )

+              )

+ -        elif pa.types.is_duration(pa_dtype):

+ +        elif pa_version_under11p0 and pa.types.is_duration(pa_dtype):

+              request.node.add_marker(

+                  pytest.mark.xfail(

+                      raises=pa.ArrowNotImplementedError,

+ @@ -1707,3 +1708,12 @@ def test_mode(data_for_grouping, dropna, take_idx, exp_idx, request):

+      result = ser.mode(dropna=dropna)

+      expected = pd.Series(data_for_grouping.take(exp_idx))

+      tm.assert_series_equal(result, expected)

+ +

+ +@pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"])

+ +def test_duration_from_strings_with_nat(unit):

+ +    # GH51175

+ +    strings = ["1000", "NaT"]

+ +    pa_type = pa.duration(unit)

+ +    result = ArrowExtensionArray._from_sequence_of_strings(strings, dtype=pa_type)

+ +    expected = ArrowExtensionArray(pa.array([1000, None], type=pa_type))

+ +    tm.assert_extension_array_equal(result, expected)

+ -- 

+ 2.39.2

+ 

file modified
+43 -14
@@ -13,7 +13,7 @@ 

  

  Name:           python-pandas

  Version:        1.5.3

- Release:        2%{?dist}

+ Release:        3%{?dist}

  Summary:        Python library providing high-performance data analysis tools

  

  # The entire source is BSD-3-Clause and covered by LICENSE, except:
@@ -94,6 +94,43 @@ 

  # https://bugzilla.redhat.com/show_bug.cgi?id=2171682

  Patch:          https://github.com/pandas-dev/pandas/pull/52150.patch

  

+ # CI: Unpin pyarrow<10

+ # https://github.com/pandas-dev/pandas/pull/50314

+ #

+ # Merged upstream as 4878dfe551da2fa8e2bc33e774b595f099bfa74e:

+ #   CI: Unpin pyarrow<10 (#50314)

+ #   * CI: Unpin pyarrow<10

+ #   * Skip test

+ #

+ # ----

+ #

+ # Add pandas.compat.pa_version_under11p0, required for pyarrow 11 support, from

+ # the following PR/commit, but without the other associated changes:

+ #

+ # ENH: support reductions for pyarrow temporal types (#50998)

+ # https://github.com/pandas-dev/pandas/pull/50998

+ #

+ # Merged upstream as 52306d957cb77a3823624679bb9606e244e7faa8:

+ #   ENH: support reductions for pyarrow temporal types

+ #   * unit check

+ #   * lint fixup

+ #

+ # ----

+ #

+ # CI: unpin pyarrow, fix failing test

+ # https://github.com/pandas-dev/pandas/pull/51175

+ #

+ # Merged upstream as 5f584bd29be7203db64bdf8619991927e29c74bc:

+ #   CI: unpin pyarrow, fix failing test (#51175)

+ #   * unpin pyarrow, fix failing test

+ #   * cleanup

+ #   * handle NaT/NaN

+ #

+ # ----

+ #

+ # All commits cherry-picked to tag v1.5.3 and combined into a single patch.

+ Patch:          pandas-1.5.3-pyarrow-10-11.patch

+ 

  %global _description %{expand:

  pandas is an open source, BSD-licensed library providing

  high-performance, easy-to-use data structures and data
@@ -362,10 +399,8 @@ 

  

  # This is just an “ecosystem” package in the upstream documentation, but there

  # is an integration test for it. This package historically had a weak

- # dependency on it, which we keep around until we package 1.4.0 to ensure

- # backward compatibility.

+ # dependency on it, but this was unnecessary.

  BuildRequires:  python3dist(pandas-datareader)

- Recommends:     python3dist(pandas-datareader)

  

  %endif

  
@@ -625,16 +660,6 @@ 

  %endif

  

  %if 0%{?fedora} > 37

- # The text of an error message has changed in libarrow/pyarrow 10, which is

- # harmless but breaks one test. Disable it until a patch is available upstream.

- #   CI: pyarrow 10 broke our ci

- #   https://github.com/pandas-dev/pandas/issues/50058

- k="${k-}${k+ and }not test_arrow_array"

- 

- # Probably also related to upstream pinning pyarrow < 10 for CI:

- # E   TypeError: Expected unicode, got pyarrow.lib.StringScalar

- k="${k-}${k+ and }not (TestConstructors and test_from_sequence_of_strings_pa_array)"

- 

  # TODO: Why does this fail? Does it need a slightly older version of dask?

  # E           AssertionError: Caused unexpected warning(s): [('RuntimeWarning', RuntimeWarning('invalid value encountered in cast'), '/builddir/build/BUILDROOT/python-pandas-1.5.3-1.fc39.x86_64/usr/lib64/python3.11/site-packages/pandas/core/dtypes/cast.py', 1836)]

  k="${k-}${k+ and }not test_construct_dask_float_array_int_dtype_match_ndarray"
@@ -717,6 +742,10 @@ 

  

  

  %changelog

+ * Wed Apr 19 2023 Benjamin A. Beasley <code@musicinmybrain.net> - 1.5.3-3

+ - Drop unnecessary weak dependency on python-pandas-datareader

+ - Backport proper pyarrow 10 and 11 support

+ 

  * Thu Apr 13 2023 Benjamin A. Beasley <code@musicinmybrain.net> - 1.5.3-2

  - Fix RHBZ#2171682 by backporting upstream PR#52150

  

Also drop the unnecessary weak dependency on python-pandas-datareader.

The pyarrow part of this update should be useful for Fedora 38 and 39 and harmless for Fedora 37.

Build succeeded.
https://fedora.softwarefactory-project.io/zuul/buildset/46db8190766446c1a49d1eaff4b139c7

Pull-Request has been merged by music

a year ago