#5 Update to 20211012 (close RHBZ#1763506)
Merged 2 years ago by music. Opened 2 years ago by music.
rpms/ music/python-pdfminer v20211012  into  rawhide

file modified
+1
@@ -9,3 +9,4 @@ 

  /pdfminer.six-20200517.tar.gz

  /pdfminer.six-20200517.tar.xz

  /pdfminer.six-20200517-filtered.tar.xz

+ /pdfminer.six-da5b96828efdb184f6410c43fea30f7b7c893dfb-filtered.tar.xz

@@ -1,26 +0,0 @@ 

- diff -uPNr pdfminer.six-20200517.orig/pdfminer/pdfdocument.py pdfminer.six-20200517/pdfminer/pdfdocument.py

- --- pdfminer.six-20200517.orig/pdfminer/pdfdocument.py	2020-05-17 11:50:01.000000000 -0400

- +++ pdfminer.six-20200517/pdfminer/pdfdocument.py	2020-06-24 04:32:07.846199417 -0400

- @@ -4,8 +4,8 @@

-  import struct

-  

-  try:

- -    from Crypto.Cipher import ARC4, AES

- -    from Crypto.Hash import SHA256

- +    from Cryptodome.Cipher import ARC4, AES

- +    from Cryptodome.Hash import SHA256

-  except ImportError:

-      AES = SHA256 = None

-      from . import arcfour as ARC4

- diff -uPNr pdfminer.six-20200517.orig/setup.py pdfminer.six-20200517/setup.py

- --- pdfminer.six-20200517.orig/setup.py	2020-05-17 11:50:01.000000000 -0400

- +++ pdfminer.six-20200517/setup.py	2020-06-24 04:31:49.508326568 -0400

- @@ -14,7 +14,7 @@

-      package_data={'pdfminer': ['cmap/*.pickle.gz']},

-      install_requires=[

-          'chardet ; python_version > "3.0"',

- -        'pycryptodome',

- +        'pycryptodomex',

-          'sortedcontainers',

-      ],

-      extras_require={

file modified
+39 -18
@@ -1,6 +1,10 @@ 

  # The import name is pdfminer. The upstream project name (as specified in

  # setup.py) is pdfminer.six, which results in a canonical project name of

  # pdfminer-six.

+ %global forgeurl https://github.com/pdfminer/pdfminer.six

+ 

+ # When upstream forgets to tag a release, but it is available from PyPI:

+ %global commit da5b96828efdb184f6410c43fea30f7b7c893dfb

  

  # Sphinx-generated HTML documentation is not suitable for packaging; see

  # https://bugzilla.redhat.com/show_bug.cgi?id=2006555 for discussion.
@@ -9,7 +13,8 @@ 

  %bcond_without doc_pdf

  

  Name:           python-pdfminer

- Version:        20200517

+ Version:        20211012

+ %forgemeta

  Release:        %autorelease

  Summary:        Tool for extracting information from PDF documents

  
@@ -20,10 +25,6 @@ 

  #     - If this is a bundled library, its origin is unclear

  #   pdfminer/ascii85.py

  #     - If this is a bundled library, its origin is unclear

- #   pdfminer/rijndael.py

- #     - Based on https://www.efgh.com/software/rijndael.htm; however, we do not

- #       treat it as a bundled dependency since it is a total rewrite from C to

- #       Python

  #

  # APAFML:

  #   pdfminer/fontmetrics.py
@@ -35,28 +36,31 @@ 

  #     - Both the original bundled data and the data generated from the

  #       adobe-mappings-cmap package are BSD-licensed.

  #

+ # ASL 2.0 and MIT:

+ #   pdfminer/_saslprep.py

+ #     - Forked from from ASL 2.0 code by MongoDB, Inc.—originally

+ #       pymongo/saslprep.py in mongo-python-driver (python-pymongo), with

+ #       additional modifications in pyHanko (not yet packaged).

+ #

  # Note that pdfminer/glyphlist.py contains data extracted and converted from

  # https://partners.adobe.com/public/developer/en/opentype/glyphlist.txt under

  # the Adobe Glyph List License; but that this license is just an MIT variant

  # (https://fedoraproject.org/wiki/Licensing:MIT?rd=Licensing/MIT#AdobeGlyph).

- License:        MIT and Public Domain and APAFML and BSD

- URL:            https://github.com/pdfminer/pdfminer.six

+ License:        MIT and Public Domain and APAFML and BSD and (ASL 2.0 and MIT)

+ URL:            %{forgeurl}

  # This has the samples/ directory stripped out. While upstream claims the

  # sample PDFs are “freely distributable”, they have unclear or unspecified

  # licenses, which makes them unsuitable for Fedora. This applies especially,

  # but not exclusively, to the contents of samples/nonfree.

  #

- # Generated with ./get_source.sh %%{version}

- Source0:        pdfminer.six-%{version}-filtered.tar.xz

+ # Generated with ./get_source.sh %%{version}, or ./get_source.sh %%{commit}

+ Source0:        pdfminer.six-%{?commit:%{commit}}%{?!commit:%{version}}-filtered.tar.xz

  # Script to generate Source0; see comments above.

  Source1:        get_source.sh

  # Downstream man pages in groff_man(7) format

  Source2:        dumppdf.1

  Source3:        pdf2txt.1

  

- # Fedora’s pycryptodomex is renamed to not conflict with pycrypto.

- Patch0:         Use-Fedora-pycryptodomex.patch

- 

  BuildArch:      noarch

  

  BuildRequires:  python3-devel
@@ -122,6 +126,20 @@ 

  

  %py_provides python3-pdfminer-six

  

+ # One file, pdfminer/_saslprep.py, is forked from from ASL 2.0 code by MongoDB,

+ # Inc.—originally pymongo/saslprep.py in mongo-python-driver

+ # (python-pymongo)—with additional modifications in pyHanko (not yet packaged),

+ # where it is pyhanko/pdf_utils/_saslprep.py.

+ #

+ # Since this is a fork of the python-pymongo module, and since the fork is not

+ # part of pyHanko’s public API, there is no possibility of using an unbundled

+ # version.

+ #

+ # The version history of the fork is not clear. We add unversioned virtual

+ # Provides for both libraries of origin.

+ Provides:       bundled(python3dist(pymongo))

+ Provides:       bundled(python3dist(pyhanko))

+ 

  %description -n python3-pdfminer

  %{common_description}

  
@@ -137,7 +155,7 @@ 

  

  

  %prep

- %autosetup -n pdfminer.six-%{version} -p1

+ %autosetup -n pdfminer.six-%{?commit:%{commit}}%{?!commit:%{version}} -p1

  mkdir -p '_man'

  cp -p '%{SOURCE2}' '%{SOURCE3}' '_man/'

  
@@ -193,13 +211,15 @@ 

  #

  # Skipped tests (and ignored files) are those that require the sample PDFs,

  # which are not included in our version of the source tarball.

- k="${k-}${k+ and }not test_font_size"

- k="${k-}${k+ and }not TestExtractText"

- k="${k-}${k+ and }not TestExtractPages"

- k="${k-}${k+ and }not TestPdfDocument"

+ k="${k-}${k+ and }not TestDumpImages"

  k="${k-}${k+ and }not TestDumpPDF"

+ k="${k-}${k+ and }not TestExtractPages"

+ k="${k-}${k+ and }not TestExtractText"

+ k="${k-}${k+ and }not TestOpenFilename"

  k="${k-}${k+ and }not TestPdf2Txt"

- k="${k-}${k+ and }not TestDumpImages"

+ k="${k-}${k+ and }not TestPdfDocument"

+ k="${k-}${k+ and }not test_font_size"

+ k="${k-}${k+ and }not test_paint_path_quadrilaterals"

  

  %pytest -k "${k-}" \

      --ignore='tests/test_tools_dumppdf.py' \
@@ -207,6 +227,7 @@ 

  

  

  %files -n python3-pdfminer -f %{pyproject_files}

+ %license LICENSE docs/licenses/LICENSE.pyHanko

  %{_bindir}/pdf2txt

  %{_bindir}/pdf2txt.py

  %{_mandir}/man1/pdf2txt.1*

file modified
+1 -1
@@ -1,1 +1,1 @@ 

- SHA512 (pdfminer.six-20200517-filtered.tar.xz) = 33511988bd69c79bc9151839ee24ff1736f470445f21214883b17e3740395a9d8b0bc9fe872b5646183b1f0ac8354a4ef7da40b71f2a9ec8bc263a855adc3e73

+ SHA512 (pdfminer.six-da5b96828efdb184f6410c43fea30f7b7c893dfb-filtered.tar.xz) = 7874899502a022adb89a56d877c2c72afe4dde8ca236ccea10eca1340fa904516e04c143852570aa163dfb5e66dda468cdb6a5866fd119303ef87413c58e8eb7

Changes in this release are unlikely to break anything, but I will verify the following dependent packages rebuild in a COPR first:

  • diffoscope
  • ocrmypdf
  • python-rows
  • setzer

The License field has grown yet another term, which will be announced to the devel list.

rebased onto 8434ee5

2 years ago

I recommend bundled(python3dist(pymongo)), bundled(python3dist(pyhanko))

I recommend bundled(python3dist(pymongo)), bundled(python3dist(pyhanko))

Thanks, that’s better. Amended.

rebased onto 532cfed

2 years ago

COPR is up: https://copr.fedorainfracloud.org/coprs/music/pdfminer-20211012/builds/

Before announcing this:

PR for the pre-existing diffoscope issue: https://src.fedoraproject.org/rpms/diffoscope/pull-request/1.

Haven’t figured out what’s wrong with ocrmypdf yet.

rebased onto 4366284

2 years ago

Haven’t figured out what’s wrong with ocrmypdf yet.

That was unrelated, see https://bugzilla.redhat.com/show_bug.cgi?id=2014639#c3

Haven’t figured out what’s wrong with ocrmypdf yet.

That was unrelated, see https://bugzilla.redhat.com/show_bug.cgi?id=2014639#c3

Yes, I was originally hoping to wait a few days for the eventual pyproject-rpm-macros update, but instead I just now built an updated pyproject-rpm-macros in the COPR.

Now ocrmypdf seems OK, except for a couple of apparently-unrelated test failures on aarch64 and a dependency problem on s390x due to an ExcludeArch in python-img2pdf. (These kinds of arch-dependent problems in noarch Python packages seem to be very common.) I’ll plan to go ahead and merge this sometime today.

Pull-Request has been merged by music

2 years ago