Blob Blame History Raw
# Sphinx-generated HTML documentation is not suitable for packaging; see
# https://bugzilla.redhat.com/show_bug.cgi?id=2006555 for discussion.
#
# We can generate PDF documentation as a lesser substitute.
%bcond doc_pdf 1

Name:           python-hdfs
Version:        2.7.2
Release:        %autorelease
Summary:        API and command line interface for HDFS

# SPDX
License:        MIT
URL:            https://github.com/mtth/hdfs
Source0:        %{url}/archive/v%{version}/hdfs-%{version}.tar.gz
# Downstream man pages in groff_man(7) format. These were written for Fedora
# based on the tools’ --help output and should be updated if the command-line
# interface changes.
Source1:        hdfscli.1
Source2:        hdfscli-avro.1

# The base package is arched because extras metapackages requiring fastavro are
# not available on 32-bit architectures
# (https://bugzilla.redhat.com/show_bug.cgi?id=1943932).
%ifnarch %{arm32} %{ix86}
%global fastavro_arch 1
%endif
# Of the binary RPMs, only the conditionally-enabled extras metapackages
# python3-hdfs+avro and python3-hdfs+dataframe are arched.
#
# Since there is no compiled code, there are no debugging symbols.
%global debug_package %{nil}

BuildRequires:  python3-devel

# Extra dependencies for documentation
%if %{with doc_pdf}
BuildRequires:  make
BuildRequires:  %{py3_dist sphinx}
BuildRequires:  python3-sphinx-latex
BuildRequires:  latexmk
%endif

%global _description %{expand:
%{summary}.

Features:

• Python bindings for the WebHDFS (and HttpFS) API, supporting both secure and
  insecure clusters.
• Command line interface to transfer files and start an interactive client
  shell, with aliases for convenient namenode URL caching.
• Additional functionality through optional extensions:
  ○ avro, to read and write Avro files directly from HDFS.
  ○ dataframe, to load and save Pandas dataframes.
  ○ kerberos, to support Kerberos authenticated clusters.}

%description %{_description}


%package -n python3-hdfs
Summary:        %{summary}

BuildArch:      noarch

%description -n python3-hdfs %{_description}


%package doc
Summary:    Documentation and examples for %{name}

BuildArch:      noarch

%description doc %{_description}


%prep
%autosetup -n hdfs-%{version} -p1

# Remove shebangs from non-script sources. The find-then-modify pattern keeps
# us from discarding mtimes on sources that do not need modification.
find . -type f ! -perm /0111 \
    -exec gawk '/^#!/ { print FILENAME }; { nextfile }' '{}' '+' |
  xargs -r -t sed -r -i '1{/^#!/d}'


%generate_buildrequires
%pyproject_buildrequires -x kerberos%{?fastavro_arch:,avro,dataframe}


# We manually write out the python3-hdfs+kerberos metapackage so that it (like
# python3-hdfs) can be noarch even though the base package is arched. The
# definition is based on:
#
#   rpm -E '%%pyproject_extras_subpkg -n python3-hdfs kerberos
%package -n python3-hdfs+kerberos
Summary:        Metapackage for python3-hdfs: kerberos extras

BuildArch:      noarch

Requires:       python3-hdfs = %{version}-%{release}

%description -n python3-hdfs+kerberos
This is a metapackage bringing in kerberos extras requires for python3-hdfs.
It makes sure the dependencies are installed.

%files -n python3-hdfs+kerberos
%ghost %{python3_sitelib}/*.dist-info


%if 0%{?fastavro_arch}

# Note that this subpackage is arched because it is not available on 32-bit
# architectures.
#
# We manually write out the python3-hdfs+avro subpackage so that it can contain
# the hdfscli-avro CLI entry point, and so that its summary and description can
# be tweaked to reflect this.  The definition is based on:
#
#   rpm -E '%%pyproject_extras_subpkg -n python3-hdfs avro
%package -n python3-hdfs+avro
Summary:        Package for python3-hdfs: avro extras

Requires:       python3-hdfs = %{version}-%{release}

%description -n python3-hdfs+avro
This is a package bringing in avro extras requires for python3-hdfs.
It makes sure the dependencies are installed.

It also includes the avro-specific command-line tool, hdfscli-avro.

%files -n python3-hdfs+avro
%ghost %{python3_sitelib}/*.dist-info

%{_bindir}/hdfscli-avro
%{_mandir}/man1/hdfscli-avro.1*


# Note that this metapackage is arched because it is not available on 32-bit
# architectures.
%pyproject_extras_subpkg -n python3-hdfs dataframe

%endif


%build
%pyproject_wheel

%if %{with doc_pdf}
PYTHONPATH="${PWD}" sphinx-build -b latex doc _latex -j%{?_smp_build_ncpus}
%make_build -C _latex LATEXMKOPTS='-quiet'
%endif


%install
%pyproject_install
%pyproject_save_files hdfs
install -t '%{buildroot}%{_mandir}/man1' -D -p -m 0644 \
    %{SOURCE1} %{?fastavro_arch:%{SOURCE2}}


%check
# Ignore upstream tests - require a hadoop cluster setup
# https://github.com/mtth/hdfs/blob/master/.travis.yml#L10
%{pyproject_check_import \
    %{?!fastavro_arch:-e hdfs.ext.avro -e hdfs.ext.dataframe}}


%files -n python3-hdfs -f %{pyproject_files}
# pyproject-rpm-macros handles the license file; verify with rpm -qL -p …
%{_bindir}/hdfscli
%{_mandir}/man1/hdfscli.1*
# This is packaged in python3-hdfs+avro on 64-bit architectures; it is not
# packaged at all on 32-bit architectures.
%exclude %{_bindir}/hdfscli-avro


%files doc
%license LICENSE
%doc AUTHORS
%doc CHANGES
%doc README.md
%if %{with doc_pdf}
%doc _latex/hdfs.pdf
%endif
%doc examples


%changelog
%autochangelog