#29 [WIP] Split bytecode cache to optional subpackages
Closed 3 years ago by churchyard. Opened 3 years ago by churchyard.
rpms/ churchyard/python3.9 bytecodes  into  master

file modified
+228 -140
@@ -17,7 +17,7 @@ 

  %global prerel rc1

  %global upstream_version %{general_version}%{?prerel}

  Version: %{general_version}%{?prerel:~%{prerel}}

- Release: 2%{?dist}

+ Release: 3%{?dist}

  License: Python

  

  
@@ -463,6 +463,10 @@ 

  Provides: bundled(python3dist(setuptools)) = %{setuptools_version}

  %endif

  

+ Recommends: %{pkgname}-libs-bytecode-opt-0%{?_isa} = %{version}-%{release}

+ Suggests:   %{pkgname}-libs-bytecode-opt-1%{?_isa} = %{version}-%{release}

+ Suggests:   %{pkgname}-libs-bytecode-opt-2%{?_isa} = %{version}-%{release}

+ 

  # There are files in the standard library that have python shebang.

  # We've filtered the automatic requirement out so libs are installable without

  # the main package. This however makes it pulled in by default.
@@ -493,6 +497,33 @@ 

    a scripting language, and by the main "%{exename}" executable

  

  

+ %package -n %{pkgname}-libs-bytecode-opt-0

+ Summary:  Python runtime libraries compiled bytecode cache (without optimizations)

+ Requires: %{pkgname}-libs%{?_isa} = %{version}-%{release}

+ 

+ %description -n %{pkgname}-libs-bytecode-opt-0

+ This package contains the non-optimized bytecode cache files for the

+ %{pkgname}-libs package.

+ 

+ 

+ %package -n %{pkgname}-libs-bytecode-opt-1

+ Summary:  Python runtime libraries compiled bytecode cache (optimization level 1)

+ Requires: %{pkgname}-libs%{?_isa} = %{version}-%{release}

+ 

+ %description -n %{pkgname}-libs-bytecode-opt-1

+ This package contains the optimized (level 1) bytecode cache files for the

+ %{pkgname}-libs package.

+ 

+ 

+ %package -n %{pkgname}-libs-bytecode-opt-2

+ Summary:  Python runtime libraries compiled bytecode cache (optimization level 2)

+ Requires: %{pkgname}-libs%{?_isa} = %{version}-%{release}

+ 

+ %description -n %{pkgname}-libs-bytecode-opt-2

+ This package contains the optimized (level 2) bytecode cache files for the

+ %{pkgname}-libs package.

+ 

+ 

  %package -n %{pkgname}-devel

  Summary: Libraries and header files needed for Python development

  Requires: %{pkgname} = %{version}-%{release}
@@ -1059,6 +1090,12 @@ 

      rm ${directory}/{__pycache__/${module}.cpython-%{pyshortver}.opt-?.pyc,${module}.py}

  done

  

+ %global noclean_pycs_marker %{_localstatedir}/lib/rpm-state/%{name}-noclean-pycs

+ %if %{without flatpackage}

+ mkdir -p $(dirname %{buildroot}%{noclean_pycs_marker})

+ echo "# intentionally empty" > %{buildroot}%{noclean_pycs_marker}

+ %endif

+ 

  # ======================================================

  # Checks for packaging issues

  # ======================================================
@@ -1146,6 +1183,108 @@ 

  %endif # with tests

  

  

+ # The list of directories containing pure Python modules in the python-libs package

+ %global module_dirs %{expand:

+ %{pylibdir}

+ %{pylibdir}/asyncio

+ %{pylibdir}/collections

+ %{pylibdir}/concurrent

+ %{pylibdir}/concurrent/futures

+ %{pylibdir}/ctypes

+ %{pylibdir}/ctypes/macholib

+ %{pylibdir}/curses

+ %{pylibdir}/dbm

+ %{pylibdir}/distutils

+ %{pylibdir}/distutils/command

+ %{pylibdir}/email

+ %{pylibdir}/email/mime

+ %{pylibdir}/encodings

+ %{pylibdir}/ensurepip

+ %{?with_rpmwheels:%exclude }%{pylibdir}/ensurepip/_bundled

+ %{pylibdir}/html

+ %{pylibdir}/http

+ %{pylibdir}/importlib

+ %{pylibdir}/json

+ %{pylibdir}/lib2to3

+ %{pylibdir}/lib2to3/fixes

+ %{pylibdir}/lib2to3/pgen2

+ %{pylibdir}/logging

+ %{pylibdir}/multiprocessing

+ %{pylibdir}/multiprocessing/dummy

+ %{pylibdir}/pydoc_data

+ %{pylibdir}/sqlite3

+ %{pylibdir}/unittest

+ %{pylibdir}/urllib

+ %{pylibdir}/venv

+ %{pylibdir}/wsgiref

+ %{pylibdir}/xml

+ %{pylibdir}/xml/dom

+ %{pylibdir}/xml/etree

+ %{pylibdir}/xml/parsers

+ %{pylibdir}/xml/sax

+ %{pylibdir}/xmlrpc

+ %{pylibdir}/zoneinfo

+ }

+ 

+ %define list_module_dirs() %{lua:

+ local module_dirs = rpm.expand("%module_dirs")

+ local bytecode_suffixes = rpm.expand("%bytecode_suffixes")

+ local ghost = ""

+ if rpm.expand("%{without flatpackage}") == "1" then

+     ghost = "%ghost "

+ end

+ for module_dir in module_dirs:gmatch('[^\\n]+') do

+     if module_dir:find('^%%exclude%s') then

+         print(module_dir .. "\\n")

+     else

+         print("%dir " .. module_dir .. "\\n")

+         print("%dir " .. module_dir .. "/__pycache__\\n")

+         print(module_dir .. "/*.py\\n")

+         print(ghost .. module_dir .. "/__pycache__/*" .. bytecode_suffixes .. "\\n")

+     end

+ end

+ }

+ 

+ %define list_bytecode_cached() %{lua:

+ local module_dirs = rpm.expand("%module_dirs")

+ local opt = rpm.expand("%{?1}")

+ local pyshortver = rpm.expand("%pyshortver")

+ for module_dir in module_dirs:gmatch('[^\\n]+') do

+     print(module_dir .. "/__pycache__/*.cpython-" .. pyshortver .. opt .. ".pyc\\n")

+ end

+ print(rpm.expand("%exclude %{pylibdir}/__pycache__/turtle.cpython-" .. pyshortver .. opt .. ".pyc\\n"))

+ }

+ 

+ %define remove_pyc_files_declaration() %{?0:

+ local module_dirs = [[%{module_dirs}]]

+ 

+ function remove_if_pyc_file(opt, pycache_dir, filename)

+     local pattern = "%%S+%%.cpython%%-%{pyshortver}" .. opt .. "%%.pyc$"

+     local antipattern = "^turtle%%.cpython%%-%{pyshortver}" .. opt .. "%%.pyc$"

+     if filename:match(pattern) and not filename:match(antipattern) then

+         os.remove(pycache_dir .. filename)

+     end

+ end

+ 

+ function remove_pyc_files_from_dir(opt, pycache_dir)

+     local pycache_stat = posix.stat(pycache_dir)

+     if pycache_stat and pycache_stat.type == "directory" then

+         for filename in posix.files(pycache_dir) do

+             remove_if_pyc_file(opt, pycache_dir, filename)

+         end

+     end

+ end

+ 

+ function remove_pyc_files(opt)

+     for module_dir in module_dirs:gmatch('[^\\n]+') do

+         if not module_dir:find('^%%exclude%%s') then

+             remove_pyc_files_from_dir(opt, module_dir .. "/__pycache__/")

+         end

+     end

+ end

+ }

+ 

+ 

  %files -n %{pkgname}

  %doc README.rst

  
@@ -1174,71 +1313,42 @@ 

  %doc README.rst

  %endif

  

- %dir %{pylibdir}

- %dir %{dynload_dir}

+ %list_module_dirs

  

  %license %{pylibdir}/LICENSE.txt

  

- %{pylibdir}/lib2to3

- %if %{without flatpackage}

- %exclude %{pylibdir}/lib2to3/tests

- %endif

- 

- %dir %{pylibdir}/unittest/

- %dir %{pylibdir}/unittest/__pycache__/

- %{pylibdir}/unittest/*.py

- %{pylibdir}/unittest/__pycache__/*%{bytecode_suffixes}

- 

- %dir %{pylibdir}/asyncio/

- %dir %{pylibdir}/asyncio/__pycache__/

- %{pylibdir}/asyncio/*.py

- %{pylibdir}/asyncio/__pycache__/*%{bytecode_suffixes}

+ %doc %{pylibdir}/distutils/README

+ %doc %{pylibdir}/email/architecture.rst

+ %doc %{pylibdir}/ctypes/macholib/README.ctypes

  

- %dir %{pylibdir}/venv/

- %dir %{pylibdir}/venv/__pycache__/

- %{pylibdir}/venv/*.py

- %{pylibdir}/venv/__pycache__/*%{bytecode_suffixes}

+ %{pylibdir}/pydoc_data/_pydoc.css

+ %{pylibdir}/lib2to3/*.txt

+ %{pylibdir}/lib2to3/*.pickle

  %{pylibdir}/venv/scripts

+ %{pylibdir}/ctypes/macholib/fetch_macholib

+ %{pylibdir}/distutils/command/command_template

+ # Some of the .py files get removed and replaced by their pycache files

+ %{pylibdir}/encodings/*.pyc

+ %{pylibdir}/pydoc_data/*.pyc

  

- %{pylibdir}/wsgiref

- %{pylibdir}/xmlrpc

- 

- %dir %{pylibdir}/ensurepip/

- %dir %{pylibdir}/ensurepip/__pycache__/

- %{pylibdir}/ensurepip/*.py

- %{pylibdir}/ensurepip/__pycache__/*%{bytecode_suffixes}

+ %if %{without flatpackage}

+ %exclude %{pylibdir}/turtle.py

+ %exclude %{pylibdir}/__pycache__/turtle*%{bytecode_suffixes}

+ %endif

  

- %if %{with rpmwheels}

- %exclude %{pylibdir}/ensurepip/_bundled

- %else

- %dir %{pylibdir}/ensurepip/_bundled

+ %if %{without rpmwheels}

  %{pylibdir}/ensurepip/_bundled/*.whl

- %{pylibdir}/ensurepip/_bundled/__init__.py

- %{pylibdir}/ensurepip/_bundled/__pycache__/*%{bytecode_suffixes}

  %endif

  

- %dir %{pylibdir}/concurrent/

- %dir %{pylibdir}/concurrent/__pycache__/

- %{pylibdir}/concurrent/*.py

- %{pylibdir}/concurrent/__pycache__/*%{bytecode_suffixes}

- 

- %dir %{pylibdir}/concurrent/futures/

- %dir %{pylibdir}/concurrent/futures/__pycache__/

- %{pylibdir}/concurrent/futures/*.py

- %{pylibdir}/concurrent/futures/__pycache__/*%{bytecode_suffixes}

- 

- %{pylibdir}/pydoc_data

- 

- %{dynload_dir}/_blake2.%{SOABI_optimized}.so

- %{dynload_dir}/_md5.%{SOABI_optimized}.so

- %{dynload_dir}/_sha1.%{SOABI_optimized}.so

- %{dynload_dir}/_sha256.%{SOABI_optimized}.so

- %{dynload_dir}/_sha3.%{SOABI_optimized}.so

- %{dynload_dir}/_sha512.%{SOABI_optimized}.so

- 

+ %dir %{dynload_dir}

+ %{dynload_dir}/array.%{SOABI_optimized}.so

  %{dynload_dir}/_asyncio.%{SOABI_optimized}.so

+ %{dynload_dir}/audioop.%{SOABI_optimized}.so

+ %{dynload_dir}/binascii.%{SOABI_optimized}.so

  %{dynload_dir}/_bisect.%{SOABI_optimized}.so

+ %{dynload_dir}/_blake2.%{SOABI_optimized}.so

  %{dynload_dir}/_bz2.%{SOABI_optimized}.so

+ %{dynload_dir}/cmath.%{SOABI_optimized}.so

  %{dynload_dir}/_codecs_cn.%{SOABI_optimized}.so

  %{dynload_dir}/_codecs_hk.%{SOABI_optimized}.so

  %{dynload_dir}/_codecs_iso2022.%{SOABI_optimized}.so
@@ -1249,49 +1359,50 @@ 

  %{dynload_dir}/_crypt.%{SOABI_optimized}.so

  %{dynload_dir}/_csv.%{SOABI_optimized}.so

  %{dynload_dir}/_ctypes.%{SOABI_optimized}.so

- %{dynload_dir}/_curses.%{SOABI_optimized}.so

  %{dynload_dir}/_curses_panel.%{SOABI_optimized}.so

+ %{dynload_dir}/_curses.%{SOABI_optimized}.so

+ %{dynload_dir}/_datetime.%{SOABI_optimized}.so

  %{dynload_dir}/_dbm.%{SOABI_optimized}.so

  %{dynload_dir}/_decimal.%{SOABI_optimized}.so

  %{dynload_dir}/_elementtree.%{SOABI_optimized}.so

+ %{dynload_dir}/fcntl.%{SOABI_optimized}.so

  %if %{with gdbm}

  %{dynload_dir}/_gdbm.%{SOABI_optimized}.so

  %endif

+ %{dynload_dir}/grp.%{SOABI_optimized}.so

  %{dynload_dir}/_hashlib.%{SOABI_optimized}.so

  %{dynload_dir}/_heapq.%{SOABI_optimized}.so

  %{dynload_dir}/_json.%{SOABI_optimized}.so

  %{dynload_dir}/_lsprof.%{SOABI_optimized}.so

  %{dynload_dir}/_lzma.%{SOABI_optimized}.so

+ %{dynload_dir}/math.%{SOABI_optimized}.so

+ %{dynload_dir}/_md5.%{SOABI_optimized}.so

+ %{dynload_dir}/mmap.%{SOABI_optimized}.so

  %{dynload_dir}/_multibytecodec.%{SOABI_optimized}.so

  %{dynload_dir}/_multiprocessing.%{SOABI_optimized}.so

+ %{dynload_dir}/nis.%{SOABI_optimized}.so

  %{dynload_dir}/_opcode.%{SOABI_optimized}.so

+ %{dynload_dir}/ossaudiodev.%{SOABI_optimized}.so

+ %{dynload_dir}/parser.%{SOABI_optimized}.so

  %{dynload_dir}/_pickle.%{SOABI_optimized}.so

+ %{dynload_dir}/_posixshmem.%{SOABI_optimized}.so

  %{dynload_dir}/_posixsubprocess.%{SOABI_optimized}.so

+ %{dynload_dir}/pyexpat.%{SOABI_optimized}.so

  %{dynload_dir}/_queue.%{SOABI_optimized}.so

  %{dynload_dir}/_random.%{SOABI_optimized}.so

+ %{dynload_dir}/readline.%{SOABI_optimized}.so

+ %{dynload_dir}/resource.%{SOABI_optimized}.so

+ %{dynload_dir}/select.%{SOABI_optimized}.so

+ %{dynload_dir}/_sha1.%{SOABI_optimized}.so

+ %{dynload_dir}/_sha256.%{SOABI_optimized}.so

+ %{dynload_dir}/_sha3.%{SOABI_optimized}.so

+ %{dynload_dir}/_sha512.%{SOABI_optimized}.so

  %{dynload_dir}/_socket.%{SOABI_optimized}.so

+ %{dynload_dir}/spwd.%{SOABI_optimized}.so

  %{dynload_dir}/_sqlite3.%{SOABI_optimized}.so

  %{dynload_dir}/_ssl.%{SOABI_optimized}.so

  %{dynload_dir}/_statistics.%{SOABI_optimized}.so

  %{dynload_dir}/_struct.%{SOABI_optimized}.so

- %{dynload_dir}/array.%{SOABI_optimized}.so

- %{dynload_dir}/audioop.%{SOABI_optimized}.so

- %{dynload_dir}/binascii.%{SOABI_optimized}.so

- %{dynload_dir}/cmath.%{SOABI_optimized}.so

- %{dynload_dir}/_datetime.%{SOABI_optimized}.so

- %{dynload_dir}/fcntl.%{SOABI_optimized}.so

- %{dynload_dir}/grp.%{SOABI_optimized}.so

- %{dynload_dir}/math.%{SOABI_optimized}.so

- %{dynload_dir}/mmap.%{SOABI_optimized}.so

- %{dynload_dir}/nis.%{SOABI_optimized}.so

- %{dynload_dir}/ossaudiodev.%{SOABI_optimized}.so

- %{dynload_dir}/parser.%{SOABI_optimized}.so

- %{dynload_dir}/_posixshmem.%{SOABI_optimized}.so

- %{dynload_dir}/pyexpat.%{SOABI_optimized}.so

- %{dynload_dir}/readline.%{SOABI_optimized}.so

- %{dynload_dir}/resource.%{SOABI_optimized}.so

- %{dynload_dir}/select.%{SOABI_optimized}.so

- %{dynload_dir}/spwd.%{SOABI_optimized}.so

  %{dynload_dir}/syslog.%{SOABI_optimized}.so

  %{dynload_dir}/termios.%{SOABI_optimized}.so

  %{dynload_dir}/unicodedata.%{SOABI_optimized}.so
@@ -1303,74 +1414,7 @@ 

  

  %dir %{pylibdir}/site-packages/

  %dir %{pylibdir}/site-packages/__pycache__/

- %{pylibdir}/site-packages/README.txt

- %{pylibdir}/*.py

- %dir %{pylibdir}/__pycache__/

- %{pylibdir}/__pycache__/*%{bytecode_suffixes}

- 

- %dir %{pylibdir}/collections/

- %dir %{pylibdir}/collections/__pycache__/

- %{pylibdir}/collections/*.py

- %{pylibdir}/collections/__pycache__/*%{bytecode_suffixes}

- 

- %dir %{pylibdir}/ctypes/

- %dir %{pylibdir}/ctypes/__pycache__/

- %{pylibdir}/ctypes/*.py

- %{pylibdir}/ctypes/__pycache__/*%{bytecode_suffixes}

- %{pylibdir}/ctypes/macholib

- 

- %{pylibdir}/curses

- 

- %dir %{pylibdir}/dbm/

- %dir %{pylibdir}/dbm/__pycache__/

- %{pylibdir}/dbm/*.py

- %{pylibdir}/dbm/__pycache__/*%{bytecode_suffixes}

- 

- %dir %{pylibdir}/distutils/

- %dir %{pylibdir}/distutils/__pycache__/

- %{pylibdir}/distutils/*.py

- %{pylibdir}/distutils/__pycache__/*%{bytecode_suffixes}

- %{pylibdir}/distutils/README

- %{pylibdir}/distutils/command

- 

- %dir %{pylibdir}/email/

- %dir %{pylibdir}/email/__pycache__/

- %{pylibdir}/email/*.py

- %{pylibdir}/email/__pycache__/*%{bytecode_suffixes}

- %{pylibdir}/email/mime

- %doc %{pylibdir}/email/architecture.rst

- 

- %{pylibdir}/encodings

- 

- %{pylibdir}/html

- %{pylibdir}/http

- 

- %dir %{pylibdir}/importlib/

- %dir %{pylibdir}/importlib/__pycache__/

- %{pylibdir}/importlib/*.py

- %{pylibdir}/importlib/__pycache__/*%{bytecode_suffixes}

- 

- %dir %{pylibdir}/json/

- %dir %{pylibdir}/json/__pycache__/

- %{pylibdir}/json/*.py

- %{pylibdir}/json/__pycache__/*%{bytecode_suffixes}

- 

- %{pylibdir}/logging

- %{pylibdir}/multiprocessing

- 

- %dir %{pylibdir}/sqlite3/

- %dir %{pylibdir}/sqlite3/__pycache__/

- %{pylibdir}/sqlite3/*.py

- %{pylibdir}/sqlite3/__pycache__/*%{bytecode_suffixes}

- 

- %if %{without flatpackage}

- %exclude %{pylibdir}/turtle.py

- %exclude %{pylibdir}/__pycache__/turtle*%{bytecode_suffixes}

- %endif

- 

- %{pylibdir}/urllib

- %{pylibdir}/xml

- %{pylibdir}/zoneinfo

+ %doc %{pylibdir}/site-packages/README.txt

  

  %if "%{_lib}" == "lib64"

  %attr(0755,root,root) %dir %{_prefix}/lib/python%{pybasever}
@@ -1393,6 +1437,47 @@ 

  

  

  %if %{without flatpackage}

+ %{noclean_pycs_marker}

+ 

+ %pretrans -n %{pkgname}-libs -p <lua>

+ %remove_pyc_files_declaration

+ if not posix.stat("%{noclean_pycs_marker}") then

+     remove_pyc_files("")

+     remove_pyc_files("%%.opt%%-1")

+     remove_pyc_files("%%.opt%%-2")

+ end

+ 

+ %files -n %{pkgname}-libs-bytecode-opt-0

+ %list_bytecode_cached

+ 

+ %postun -n %{pkgname}-libs-bytecode-opt-0 -p <lua>

+ %remove_pyc_files_declaration

+ -- Run it only if we are uninstalling

+ if arg[2] == 0 then

+     remove_pyc_files("")

+ end

+ 

+ %files -n %{pkgname}-libs-bytecode-opt-1

+ %list_bytecode_cached .opt-1

+ 

+ %postun -n %{pkgname}-libs-bytecode-opt-1 -p <lua>

+ %remove_pyc_files_declaration

+ if arg[2] == 0 then

+     remove_pyc_files("%%.opt%%-1")

+ end

+ 

+ %files -n %{pkgname}-libs-bytecode-opt-2

+ %list_bytecode_cached .opt-2

+ 

+ %postun -n %{pkgname}-libs-bytecode-opt-2 -p <lua>

+ %remove_pyc_files_declaration

+ if arg[2] == 0 then

+     remove_pyc_files("%%.opt%%-2")

+ end

+ %endif

+ 

+ 

+ %if %{without flatpackage}

  %files -n %{pkgname}-devel

  %endif

  
@@ -1640,6 +1725,9 @@ 

  # ======================================================

  

  %changelog

+ * Mon Aug 31 2020 Miro Hrončok <mhroncok@redhat.com> - 3.9.0~rc1-3

+ - Split bytecode cache to optional subpackages

+ 

  * Wed Aug 12 2020 Petr Viktorin <pviktori@redhat.com> - 3.9.0~rc1-2

  - In sys.version and initial REPL message, list the source commit as "default"

  

@@ -0,0 +1,13 @@ 

+ #!/usr/bin/bash -eux

+ XY=$(rpm --eval '%python3_version_nodots')

+ 

+ # There should be no noghost pyc files in python3-libs in __pycache__ directories

+ rpm -ql --noghost python3-libs | grep /__pycache__/ | grep \\.pyc$ && exit 1 || true

+ 

+ # But there should be some ghosted ones

+ rpm -ql python3-libs | grep /__pycache__/ | grep -q \\.pyc$

+ 

+ # The files in bytecode levels should be identical to the ghosted ones

+ diff -u <(rpm -ql python3-libs-bytecode-opt-0) <(rpm -ql python3-libs | grep \\.cpython-${XY}\\.pyc$)

+ diff -u <(rpm -ql python3-libs-bytecode-opt-1) <(rpm -ql python3-libs | grep \\.cpython-${XY}\\.opt-1\\.pyc$)

+ diff -u <(rpm -ql python3-libs-bytecode-opt-2) <(rpm -ql python3-libs | grep \\.cpython-${XY}\\.opt-2\\.pyc$)

file modified
+4
@@ -34,6 +34,9 @@ 

      - marshalparser:

          dir: python/marshalparser

          run: VERSION=3.9 SAMPLE=10 test_marshalparser_compatibility.sh

+     - bytecode:

+         dir: .

+         run: ./test_bytecode.sh

      required_packages:

      - gcc  # for extension building in venv and selftest

      - gdb  # for test_gdb
@@ -42,6 +45,7 @@ 

      - python3-devel  # for extension building in venv and selftest

      - python3-tkinter  # for selftest

      - python3-test  # for selftest

+     - python3-rpm-macros  # for the possibility to eval %python3_version

      - python3-tox  # for venv tests

      - glibc-all-langpacks # for locale tests

      - marshalparser  # for testing compatibility (magic numbers) with marshalparser

This creates three subpackages with the bytecode cache (.pyc files):

  • python3-libs-bytecode-opt-0
  • python3-libs-bytecode-opt-1
  • python3-libs-bytecode-opt-2

The non-optimized (opt-0) package is Recommended by python3-libs and hence
installed by default, but removable (opt-out).
The other two are only Suggested and hence they are installable but not
installed by default (opt-in).

The packages contain the pre-compiled cache, such as:

  • /usr/lib64/python3.9/__pycache__/random.cpython-39.pyc
  • /usr/lib64/python3.9/__pycache__/random.cpython-39.opt-1.pyc
  • /usr/lib64/python3.9/__pycache__/random.cpython-39.opt-2.pyc

The paths are also %ghosted in the "main" python3-libs package, that owns:

  • The .py files, e.g. /usr/lib64/python3.9/random.py
  • The /usr/lib64/python3.9/__pycache__/ and other __pycache__ directories (but not the .pyc files in it)

The new bytecode packages Require python3-libs.

This allows to save significant space for installations where the bytecode
cache is not important.

Two important things to note about this approach:

  1. Cross-package hardlinks are not likely to work, hence when all three
    packages are installed together, more disk space is claimed than before.

  2. Python will create the missing bytecode cache files on demand when run as root.
    When in restricted SELinux context, this will fail.
    A SELinux alert will be generated unless suppressed, which we plan to do,
    see https://github.com/fedora-selinux/selinux-policy/pull/404

Co-Authored-By: @dormouse

(To clarify, this is WIP, because we need to ensure it plays nice with SELinux, but the change in Python packaging is ready for review.)

rebased onto 407fda1ef0957b750473fb6c53b3c1384f97e37c

3 years ago

rebased onto 6b551d052ce120a961c87522fd931d6fa93800cd

3 years ago

Why are the .so files now ordered differently?

The entire %files section was reorganized from what was an essentially random order to something more consistent. This particular thing was a separate commit in the previous PR and I've squashed it, while I probably should have not. Will pull it out.

rebased onto 26427f0

3 years ago

The sorting is now a separate commit.

So, first off a general question. I believed the ideal solution would be to only have the non-optimized .pyc file, and have the opt-1/2 files and the .py file as optional installs. But of course, without the .py file the .pyc file is not found in __pycache__ and so this is more of a long-term project. Is that correct?

If that assumption stands, that would mean that while now we're proposing a mandatory .py and optional non-optimized .pyc, later on that would be switched to an optional .py and mandatory non-optimized .pyc. Is that correct?

2 new commits added

  • Split bytecode cache to optional subpackages
  • Sort files in %{dynload_dir}
3 years ago

The paths are also %ghosted in the "main" python3-libs package, that owns:
/usr/lib64/python3.9/random.py
/usr/lib64/python3.9/__pycache__/

This is a bit confusing, is there really a random.py at that location, or is it just a "random" file? I'd say skip that line, the 2nd example is the only important one.

So, first off a general question. I believed the ideal solution would be to only have the non-optimized .pyc file, and have the opt-1/2 files and the .py file as optional installs.

Ideal how? This is not what we are trying to do here. It is anther option how to save space, but it breaks many user expectations. We already had this discussion in the past.

Here we attempt to keep the sources (mandatory) but have all the pre-compiled bytecode caches installable from RPM (optional, with non-optimazed caches recommended = installed by default).

What you talk about has been done with several large and generated modules only in https://src.fedoraproject.org/rpms/python3.9/pull-request/16

But of course, without the .py file the .pyc file is not found in __pycache__ and so this is more of a long-term project. Is that correct?

I don't understand the question :/ This is a long-term project, but not the same one you talk about.

If that assumption stands, that would mean that while now we're proposing a mandatory .py and optional non-optimized .pyc, later on that would be switched to an optional .py and mandatory non-optimized .pyc. Is that correct?

No.

The paths are also %ghosted in the "main" python3-libs package, that owns:
/usr/lib64/python3.9/random.py
/usr/lib64/python3.9/__pycache__/

This is a bit confusing, is there really a random.py at that location, or is it just a "random" file?

Yes, there is a random.py file at that location. If the name is confusing, I can pick any other one, what about ast.py?

I'd say skip that line, the 2nd example is the only important one.

What 2nd example?

2 new commits added

  • Split bytecode cache to optional subpackages
  • Sort files in %{dynload_dir}
3 years ago

The code of the macros looks good to me.

Spitballing: Do we want to have the expanded content from the macros committed somewhere in the dist-git? I would find it useful, both for checking what is and isn't supposed to be in the package, and also to see changes when rebasing to a new version.

Do we want to have the expanded content from the macros committed somewhere in the dist-git?

Where would you put it and how?

%exclude %{pylibdir}/lib2to3/tests

This is only removed but never added anywhere else. And I don't believe it's added by the macros.

%exclude %{pylibdir}/lib2to3/tests

This is only removed but never added anywhere else. And I don't believe it's added by the macros.

Indeed removed and not added, but correctly.

The following:

%{pylibdir}/lib2to3
%exclude %{pylibdir}/lib2to3/tests

Was replaced by:

%dir %{pylibdir}/lib2to3
%dir %{pylibdir}/lib2to3/__pycache__
%{pylibdir}/lib2to3/*.py
%ghost %{pylibdir}/lib2to3/__pycache__/*%{bytecode_suffixes}

%dir %{pylibdir}/lib2to3/fixes
%dir %{pylibdir}/lib2to3/fixes/__pycache__
%{pylibdir}/lib2to3/fixes/*.py
%ghost %{pylibdir}/lib2to3/fixes/__pycache__/*%{bytecode_suffixes}

%dir %{pylibdir}/lib2to3/pgen2
%dir %{pylibdir}/lib2to3/pgen2/__pycache__
%{pylibdir}/lib2to3/pgen2/*.py
%ghost %{pylibdir}/lib2to3/pgen2/__pycache__/*%{bytecode_suffixes}

It is a bit verbose, but generated from the following three lines only:

%{pylibdir}/lib2to3
%{pylibdir}/lib2to3/fixes
%{pylibdir}/lib2to3/pgen2

We cannot longer recursively list entire directories with nested pure Python modules in python3-libs %files section, because we need to know what's in the directories. If needed to be, we could change the Lua macros to Python/Shell scripts and generate filelists after %install, but I find that harder to debug.

Side note: I use rpmspec --without flatpackage -P python3.9.spec to debug this.

Also:

meld <(rpmspec --without flatpackage -P <(git show master:python3.9.spec)) <(rpmspec --without flatpackage -P python3.9.spec)

So, first off a general question. I believed the ideal solution would be to only have the non-optimized .pyc file, and have the opt-1/2 files and the .py file as optional installs.

Ideal how? This is not what we are trying to do here. It is anther option how to save space, but it breaks many user expectations. We already had this discussion in the past.

We did, and I thought we identified this as the best solution long-term. Do I remember that incorrectly?
I do understand that's not what we're doing here, but I thought it was the long-term hope.

Here we attempt to keep the sources (mandatory) but have all the pre-compiled bytecode caches installable from RPM (optional, with non-optimazed caches recommended = installed by default).

What you talk about has been done with several large and generated modules only in https://src.fedoraproject.org/rpms/python3.9/pull-request/16

But of course, without the .py file the .pyc file is not found in __pycache__ and so this is more of a long-term project. Is that correct?

I don't understand the question :/ This is a long-term project, but not the same one you talk about.

If that assumption stands, that would mean that while now we're proposing a mandatory .py and optional non-optimized .pyc, later on that would be switched to an optional .py and mandatory non-optimized .pyc. Is that correct?

No.

Because if the "ideal" solution I described was something we were aiming for in the future, I would be worried about going back and forth about what is mandatory and what isn't.

The paths are also %ghosted in the "main" python3-libs package, that owns:
/usr/lib64/python3.9/random.py
/usr/lib64/python3.9/__pycache__/

This is a bit confusing, is there really a random.py at that location, or is it just a "random" file?

Yes, there is a random.py file at that location. If the name is confusing, I can pick any other one, what about ast.py?

I'd say skip that line, the 2nd example is the only important one.

What 2nd example?

By 2nd example I meant the line /usr/lib64/python3.9/__pycache__/. I'm not sure how random.py or ast.py is relevant and thus it confused me.

We can certainly restart the discussion about the solution we aim for.

The idea here was that this is the solution we aim for. Time (and feedback) can show that this was a bad idea and at that point we might re-do this completely, but there is no plan to do this now and redo it differently later.

Do we want to have the expanded content from the macros committed somewhere in the dist-git?

Where would you put it and how?

At the moment two options come to me, but I'm sure there are more:

  1. in %prep create a new file with the expanded contents of the macro, which we then check into git.

So after every change of the spec file, we would be expected to run fedpkg prep, pull out a file from the source dir, and commit it to dist git? Seems rather tedious.

  1. Pre-generate the content into the spec file.

That seems backwards to me. The macros were created explicitly so that we don't need to pre-process the spec file with tools and/or keep 4 filelists in sync.


Maybe my viewpoint is limited but I don't see where this is going. Do you think the macros are too magical?

%exclude %{pylibdir}/lib2to3/tests

This is only removed but never added anywhere else. And I don't believe it's added by the macros.

Indeed removed and not added, but correctly.

The following:

%{pylibdir}/lib2to3 %exclude %{pylibdir}/lib2to3/tests

Was replaced by:

```
%dir %{pylibdir}/lib2to3
%dir %{pylibdir}/lib2to3/pycache
%{pylibdir}/lib2to3/.py
%ghost %{pylibdir}/lib2to3/pycache/
%{bytecode_suffixes}

%dir %{pylibdir}/lib2to3/fixes
%dir %{pylibdir}/lib2to3/fixes/pycache
%{pylibdir}/lib2to3/fixes/.py
%ghost %{pylibdir}/lib2to3/fixes/pycache/
%{bytecode_suffixes}

%dir %{pylibdir}/lib2to3/pgen2
%dir %{pylibdir}/lib2to3/pgen2/pycache
%{pylibdir}/lib2to3/pgen2/.py
%ghost %{pylibdir}/lib2to3/pgen2/pycache/
%{bytecode_suffixes}
```

It is a bit verbose, but generated from the following three lines only:

%{pylibdir}/lib2to3 %{pylibdir}/lib2to3/fixes %{pylibdir}/lib2to3/pgen2

We cannot longer recursively list entire directories with nested pure Python modules in python3-libs %files section, because we need to know what's in the directories. If needed to be, we could change the Lua macros to Python/Shell scripts and generate filelists after %install, but I find that harder to debug.

That makes sense. +1

By 2nd example I meant the line /usr/lib64/python3.9/__pycache__/. I'm not sure how random.py or ast.py is relevant and thus it confused me.

The python3-libs package owns the .py files and the __pycache__ directory. That's what the two lines say. Feel free to suggest better wording.

Side note: I use rpmspec --without flatpackage -P python3.9.spec to debug this.

Also:

meld <(rpmspec --without flatpackage -P <(git show master:python3.9.spec)) <(rpmspec --without flatpackage -P python3.9.spec)

That's exactly what I'm "spitballing" about, this would be great to have saved in git so it could be easily compared and checked.

We can certainly restart the discussion about the solution we aim for.

The idea here was that this is the solution we aim for. Time (and feedback) can show that this was a bad idea and at that point we might re-do this completely, but there is no plan to do this now and redo it differently later.

That sounds good to me to discuss before we merge this. Because if we're aiming in the non-optimized .pyc only direction, which I thought we were, I would reconsider making the non-optimized .pyc optional now in the meantime.

That sounds good to me to discuss before we merge this.

Totally agreed. I plan to go trough the change process, so there will be a discussion with larger audience as well. Don't worry about merging this before it is discussed.

Because if we're aiming in the non-optimized .pyc only direction, which I thought we were, I would reconsider making the non-optimized .pyc optional now in the meantime.

If we aim for a solution, we will go there without de-tours. There is no point in this PR if this is not the solution we want. Certainly, there is no twisted plan to do this just to revert it later and do that.

By 2nd example I meant the line /usr/lib64/python3.9/__pycache__/. I'm not sure how random.py or ast.py is relevant and thus it confused me.

The python3-libs package owns the .py files and the __pycache__ directory. That's what the two lines say. Feel free to suggest better wording.

How about:

The paths are also %ghosted in the "main" python3-libs package, which contains:

  • The .py files, e.g.: /usr/lib64/python3.9/random.py
  • The /usr/lib64/python3.9/__pycache__/ directory (but not most of the .pyc files inside)

That sounds good to me to discuss before we merge this.

Totally agreed. I plan to go trough the change process, so there will be a discussion with larger audience as well. Don't worry about merging this before it is discussed.

Because if we're aiming in the non-optimized .pyc only direction, which I thought we were, I would reconsider making the non-optimized .pyc optional now in the meantime.

If we aim for a solution, we will go there without de-tours. There is no point in this PR if this is not the solution we want. Certainly, there is no twisted plan to do this just to revert it later and do that.

Really? I see it exactly the opposite. Making the opt-1 and opt-2 .pyc files optional is part of both plans, so I see this as a step forward regardless. Only the non-optimized .pyc files are not aligned with the "ideal" solution I mentioned.

2 new commits added

  • Split bytecode cache to optional subpackages
  • Sort files in %{dynload_dir}
3 years ago
  1. I've amended the commit message (and PR description) as suggested, but not exactly, so please re-review.
  2. I'd rather have the discussion about what solution is "ideal" out of this PR discussion, it is messy enough already. If we go with another solution, it will be in another PR.
  3. I don't think that committing in something that's easily generated and inspected outweighs the trouble of remembering to do it / automating it. As an example, we don't commit in the result of rpm -ql ... now. If we want to do such things, let's do it in another PR and discuss it there? Honestly, I'd rather have some CI based rpmdiff output visible in PRs than this.

I see it was changed to:

 - The /usr/lib64/python3.9/__pycache__/ and other __pycache__ directories
   (but not the .pyc files in it)

IIRC there are some .pyc files inside it, that's why I used the softer wording. But that's just a nitpick.

IIRC there are some .pyc files inside it

tl;dr: If they are, it's a bug, that's why I changed the wording.

Longer explanation:

  • There are no real pyc files in __pycached__ directories in python3-libs.
  • There are some real pyc files in python3-libs but not in __pycached__ directories.
  • There are ghosted pyc files in __pycached__ directories in python3-libs. I.e. the python3-libs package co-owns the files, but does not ship them.
  • There are real pyc files in __pycached__ directories but not in python3-libs (e.g. in python3-libs-bytecode-opt-0 but also python3-test or python3-idle)

Build succeeded.

IIRC there are some .pyc files inside it

tl;dr: If they are, it's a bug, that's why I changed the wording.

Longer explanation:

  • There are no real pyc files in __pycached__ directories in python3-libs.
  • There are some real pyc files in python3-libs but not in __pycached__ directories.
  • There are ghosted pyc files in __pycached__ directories in python3-libs. I.e. the python3-libs package co-owns the files, but does not ship them.
  • There are real pyc files in __pycached__ directories but not in python3-libs (e.g. in python3-libs-bytecode-opt-0 but also python3-test or python3-idle)

Ah right, I misremembered where the .pyc files were located. +1

  1. I've amended the commit message (and PR description) as suggested, but not exactly, so please re-review.

Looking good.

  1. I'd rather have the discussion about what solution is "ideal" out of this PR discussion, it is messy enough already. If we go with another solution, it will be in another PR.

Absolutely. Maybe we can start with a short call. I don't want to muddy the discussion if I'm just missing some new developments or something like that.

  1. I don't think that committing in something that's easily generated and inspected outweighs the trouble of remembering to do it / automating it. As an example, we don't commit in the result of rpm -ql ... now. If we want to do such things, let's do it in another PR and discuss it there? Honestly, I'd rather have some CI based rpmdiff output visible in PRs than this.

I wanted to discuss this as part of this PR, because we're taking some clarity out of the git history in this PR. And while yes, it can be easily generated for a single given commit, it's a far cry from git blame. On the other hand, the specific information isn't critical, so the added complexity of automating or manually maintaining it weighs heavily, as you say. Thinking on it more, you're right, it's not worth pursuing for this.

Absolutely. Maybe we can start with a short call. I don't want to muddy the discussion if I'm just missing some new developments or something like that.

Please read solution 5 and 7 in https://github.com/hroncok/python-minimization/blob/master/document.md before we discuss this.

Absolutely. Maybe we can start with a short call. I don't want to muddy the discussion if I'm just missing some new developments or something like that.

Please read solution 5 and 7 in https://github.com/hroncok/python-minimization/blob/master/document.md before we discuss this.

Done.

We had a call with @churchyard and @lbalhar about what we consider the best approach to minimization in the long term. What follows are my personal notes, so take them with a grain of salt.

I came in liking most Solution 7: Stop shipping mandatory source files, ship .pyc instead [0], and Miro Solution 5: Stop shipping mandatory bytecode cache with some details from Solution 6: Stop shipping mandatory optimized bytecode cache mixed in.

[0] https://github.com/hroncok/python-minimization/blob/master/document.md

During the meeting we have discussed the following points (some of which are not covered in the original document [0]):

Problems with Solution 5: Stop shipping mandatory bytecode cache:

  • 8% larger disk footprint than Solution 7
  • Somewhat slower start without optional bytecodes installed
  • Generates new files in /usr/lib during runtime
    • This is somewhat unexpected, especially from rpm-installed software
    • It makes containers larger still when a Python script is being run as part of the container build (e.g. in s2i-python-container), unless the user knows about this behaviour and manually counters it (flag for Python or deleting files)

Problems with Solution 7: Stop shipping mandatory source files, ship .pyc instead:

  • When the optional (but installed by default) .py files were not installed:
    • Tracebacks (or their parts) from inside the standard library would be obfuscated
    • Programming IDEs would likely have issues with missing .py files
  • Users would need to install .py files to be able to modify them, and then they would have to trust that the installed and unmodified .py file is doing exactly what the .pyc file was doing
  • When launching Python with an optimization level with only the non-optimized .pyc file installed, it would be run without optimizations
    • This is not really a problem when the difference is only that the doctexts weren’t removed
    • This is a serious problem when __debug__ is used inside the code, and asserts could also pose an issue
  • This needs a PEP for Python to avoid having the non-optimized .pyc file duplicated on the filesystem (inside and outside of __pycache__)
  • It is largely untested and unexpected in the Python ecosystem that .py files are missing

When we finished the discussion, my mind has been changed, and we all agreed that Solution 5 is our preferred way to go.

Thank you, @churchyard and @lbalhar for discussing with me!

Thanks for the wrap up.

8% larger disk footprint than Solution 7

To clarify, the 8% relates to the size of entire python3-libs.

Problems found:

When python3-libs-bytecode-opt-? is uninstalled, the pyc files are kept, as they are also owned by python3-libs.

Mitigation ideas:

  1. Do nothing and say this is expected.
  2. Remove the files in postun scriptlets.

Moreover, when Python is updated for the first time from before this change to after this change, the opt-1 and opt-2 files also remain on the filesystem. Even if we choose (2) from the above, such scriptlets won't run in this case.

Mitigation ideas:

  1. Do nothing and say this is expected. Such files would be removed upon upgrading to Python 3.10+.
  2. Obsolete previous versions of python3-libs from python3-libs and all the bytecode subpackages. When upgrading, users will get all of them, but optimization 1 and 2 would be removed when dnf autoremove is used. However if dnf autoremove is not used, such packages would also be upgraded upon upgrading to Python 3.10+.

Another problem found. The two files below are in python3-libs-bytecode-opt-? but they are excluded from python3-libs (and hence not ghosted from them). We should make sure to also exclude them from the bytecode packages.

  • /usr/lib64/python3.9/ensurepip/_bundled/__pycache__/__init__.cpython-39.*pyc
  • /usr/lib64/python3.9/__pycache__/turtle.cpython-39.*pyc

Problems found:

When python3-libs-bytecode-opt-? is uninstalled, the pyc files are kept, as they are also owned by python3-libs.

Mitigation ideas:

  1. Do nothing and say this is expected.
  2. Remove the files in postun scriptlets.

The second option seems to be okay but we have to be careful because IFAIK postun runs also on upgrades. https://docs.fedoraproject.org/en-US/packaging-guidelines/Scriptlets/#_syntax

Moreover, when Python is updated for the first time from before this change to after this change, the opt-1 and opt-2 files also remain on the filesystem. Even if we choose (2) from the above, such scriptlets won't run in this case.

The scriptlets from (2) will be defined only for python3-libs-bytecode-opt-?, right? So an upgrade of python3-libs should not run them at all.

Mitigation ideas:

  1. Do nothing and say this is expected. Such files would be removed upon upgrading to Python 3.10+.
  2. Obsolete previous versions of python3-libs from python3-libs and all the bytecode subpackages. When upgrading, users will get all of them, but optimization 1 and 2 would be removed when dnf autoremove is used. However if dnf autoremove is not used, such packages would also be upgraded upon upgrading to Python 3.10+.

Would it be possible to define a one-time scriptlet? I mean in a way like: "if we are upgrading from the last version with all pyc files, remove those with opt-1 and opt-2." In this way, the first upgrade of python3-libs would remove all files newly available from the new optional subpackages.
Or "if the current version is higher than the last with all pyc files and the optional packages are not present, remove the optimized pyc files" which would handle also bigger jumps in version-release combinations.

Another problem found. The two files below are in python3-libs-bytecode-opt-? but they are excluded from python3-libs (and hence not ghosted from them). We should make sure to also exclude them from the bytecode packages.

  • /usr/lib64/python3.9/ensurepip/_bundled/__pycache__/__init__.cpython-39.*pyc
  • /usr/lib64/python3.9/__pycache__/turtle.cpython-39.*pyc

Sounds like an interesting idea for our CI to tests.

Sounds like an interesting idea for our CI to tests.

Yes, and also seems easy enough to script up!

Would it be possible to define a one-time scriptlet? I mean in a way like: "if we are upgrading from the last version with all pyc files, remove those with opt-1 and opt-2." In this way, the first upgrade of python3-libs would remove all files newly available from the new optional subpackages.

That would require a persistent marker on the filesystem that says "this has been done already".

Or "if the current version is higher than the last with all pyc files and the optional packages are not present, remove the optimized pyc files" which would handle also bigger jumps in version-release combinations.

That would an access to RPM database (which can be a little dangerous to access mid-transaction).

Spitballing mitigation idea #3:

  • Don't %ghost the .pyc files from python3-libs, but have a scriptlet that will remove them in the unlikely event of uninstalling python3-libs.

That way, if the files are dnf-installed, they're owned by an rpm-package and everything is fine. But if they are user-generated, they will show up as non-dnf owned, which I think is a good thing.

And I think it helps with our upgrade problem, but I'm not 100% certain.

What do you think, is it too crazy?

The unlikely event would also happen when we update python3 to a newer version, such as 3.10, 3.11. Hence the scriptlet would need to detect such situation. I don't think this is a good solution (and I don't think that having them not owned by a package is a good thing either).

I just realized a possible problem with this PR (whether we use %ghost or not):

Situation: User has only python3-libs listalled, no .pyc. They run some Python scripts with sudo and some of the .pyc files are generated. And now the user tries to install the .pyc subpackages — the paths now conflict with the files generated by user with sudo. Are the contents the same? I think in general they won't be. What happens during the installation?

Situation: User has only python3-libs listalled, no .pyc. They run some Python scripts with sudo and some of the .pyc files are generated. And now the user tries to install the .pyc subpackages — the paths now conflict with the files generated by user with sudo. Are the contents the same? I think in general they won't be. What happens during the installation?

The contents may not be bit-by-bit identical. The pyc files are silently replaced by the ones shipped from the bytecode package, there is no error or warning, no reported conflict.

The unlikely event would also happen when we update python3 to a newer version, such as 3.10, 3.11. Hence the scriptlet would need to detect such situation. I don't think this is a good solution (and I don't think that having them not owned by a package is a good thing either).

Ah, we would need to run the scriptlet not only in the unlikely event of uninstalling python3-libs, but also when updating python3-libs to a new Major version, since it's technically the same package even if built from a different component. And we'd have no good way to detect that. Yep, that sucks :/

Situation: User has only python3-libs listalled, no .pyc. They run some Python scripts with sudo and some of the .pyc files are generated. And now the user tries to install the .pyc subpackages — the paths now conflict with the files generated by user with sudo. Are the contents the same? I think in general they won't be. What happens during the installation?

The contents may not be bit-by-bit identical. The pyc files are silently replaced by the ones shipped from the bytecode package, there is no error or warning, no reported conflict.

Interesting. You're sure the files weren't identical yet they were silently replaced? That's... not what I expected.

You're sure the files weren't identical.

No, not at all, they are likely to be identical, but they are not guaranteed to be so. Yet, let me replay the experiment:

$ mock -r fedora-rawhide-x86_64 remove python3-libs-bytecode-opt-\*
$ mock -r fedora-rawhide-x86_64 shell
<mock-chroot> sh-5.0# find /usr/lib64/python3.9 -name '*.pyc' -delete
<mock-chroot> sh-5.0# python3 -c pass
<mock-chroot> sh-5.0# find /usr/lib64/python3.9 -name '*.pyc' 
/usr/lib64/python3.9/__pycache__/_sitebuiltins.cpython-39.pyc
/usr/lib64/python3.9/__pycache__/genericpath.cpython-39.pyc
/usr/lib64/python3.9/__pycache__/posixpath.cpython-39.pyc
/usr/lib64/python3.9/__pycache__/abc.cpython-39.pyc
/usr/lib64/python3.9/__pycache__/_collections_abc.cpython-39.pyc
/usr/lib64/python3.9/__pycache__/io.cpython-39.pyc
/usr/lib64/python3.9/__pycache__/site.cpython-39.pyc
/usr/lib64/python3.9/__pycache__/codecs.cpython-39.pyc
/usr/lib64/python3.9/__pycache__/os.cpython-39.pyc
/usr/lib64/python3.9/__pycache__/stat.cpython-39.pyc
/usr/lib64/python3.9/encodings/__pycache__/aliases.cpython-39.pyc
/usr/lib64/python3.9/encodings/__pycache__/__init__.cpython-39.pyc
/usr/lib64/python3.9/encodings/__pycache__/utf_8.cpython-39.pyc
/usr/lib64/python3.9/encodings/__pycache__/latin_1.cpython-39.pyc
<mock-chroot> sh-5.0# echo trollolol > /usr/lib64/python3.9/__pycache__/site.cpython-39.pyc
<mock-chroot> sh-5.0# cat /usr/lib64/python3.9/__pycache__/site.cpython-39.pyc
trollolol
$ mock -r fedora-rawhide-x86_64 install .../python3-libs-bytecode-opt-0-3.9.0~rc1-3.fc34.x86_64.rpm
$ mock -r fedora-rawhide-x86_64 shell
<mock-chroot> sh-5.0# cat /usr/lib64/python3.9/__pycache__/site.cpython-39.pyc
a
СN_-U�@s&dZddlZddlZddlZddlZddlZejejgda  da
da
  dd�Z
d�Zd  dddZ
�Zd
   d
d�Zdd�Zdd�Zdd�Zdd�Zdd�Zdd��Zd3d�Zd4dd�Zdd �Z�d!d"�Z#d$�Zd%d&�Zd'd(�Zd)d*�Zd+d,�Z d-d.�Z!ej"j#�s
e!�d/d0�Z$e%d1k�r"e$�dS)5a�
                           Append module search paths for third-party packages to sys.path.
...

BTW This behavior was expected when we decided to use %ghosts.

Would it be possible to define a one-time scriptlet? I mean in a way like: "if we are upgrading from the last version with all pyc files, remove those with opt-1 and opt-2." In this way, the first upgrade of python3-libs would remove all files newly available from the new optional subpackages.

That would require a persistent marker on the filesystem that says "this has been done already".

Not necessarily an explicit marker, all we'd need is a change between the two versions detectable on the filesystem. I ran the list of packages for python3-libs in Fedora rawhide, and it includes this:

/usr/lib/.build-id
/usr/lib/.build-id/0c
/usr/lib/.build-id/0c/4bffb6514f6939847ec04b79cb894f877bae24
/usr/lib/.build-id/0d
/usr/lib/.build-id/0d/035c18167d9bd3d0e0ccf2e78f4e16407a0ac6
/usr/lib/.build-id/12
/usr/lib/.build-id/12/c306a11fc7586557b35c85a73140e5401cba1a
/usr/lib/.build-id/13
/usr/lib/.build-id/13/7ee205bb5b157a540e9cba5524a3f949881468
/usr/lib/.build-id/14
/usr/lib/.build-id/14/fb7d4705a3fa1065e724c397a4af451aeed299
/usr/lib/.build-id/19
/usr/lib/.build-id/19/54d9a6812c6766fb68e81b227729699d5e8890
/usr/lib/.build-id/19/ebbc6db7a04134ed5c6732de637b76612d6be5
/usr/lib/.build-id/1a
[... long list ...]

We could:
1. Put a marker file inside that directory, never to be noticed by anyone.
2. Remove that directory (do we need it?) and detect whether it's present or not.

Hm, thinking on it more, we couldn't detect whether /usr/lib/.build-id was removed or not, as we can only see the state before the update or after the update, but not both. :/

Hm, thinking on it more, we couldn't detect whether /usr/lib/.build-id was removed or not, as we can only see the state before the update or after the update, but not both. :/

Unless we had both a %pre scriptlet and a %postun scriptlet (or similar) and transferred data between them (using a temporary marker?), but that's starting to get too complicated to be reliable.

I don't understand what you are proposing at all.

  1. Why would we put a marker to "that directory"? Which directory in particular, /usr/lib/.build-id or some directory within?
  2. Why would we remove a directory that is created by some process for some reasons that go beyond our scope? The /usr/lib/.build-id directory contains packages owned by many packages.
  3. The /usr/lib/.build-id directory would be recreated by any package upgrade/installation. That means both python3-libs and any other RPM package.
  4. How would a presence or absence of this directory help us to achieve our goal?
  5. Why would we need to detect a state of a directory both before the update and after the update? What are you actually trying to detect with this?

I am seriously confused by your proposal, sorry about that.

The unlikely event would also happen when we update python3 to a newer version, such as 3.10, 3.11. Hence the scriptlet would need to detect such situation. I don't think this is a good solution (and I don't think that having them not owned by a package is a good thing either).

Ah, we would need to run the scriptlet not only in the unlikely event of uninstalling python3-libs, but also when updating python3-libs to a new Major version, since it's technically the same package even if built from a different component. And we'd have no good way to detect that. Yep, that sucks :/

Actually, when updating, we could detect whether there's are __pycache__ directories in /usr/lib* for Python X.Y-1 (and -2) but no .py files above them, and then delete those __pycache__ directories. But it's also non-trivial.

Actually, when updating, we could detect whether there's are __pycache__ directories in /usr/lib* for Python X.Y-1 (and -2) but no .py files above them, and then delete those __pycache__ directories. But it's also non-trivial.

That actually seems rather trivial from the first glance. However, RPM file ownership already exists to solve this problem, this seems like reinventing the wheel.

1 new commit added

  • Hanlde and test turtle.py and ensurepip/_bundled %excludes in bytecode subpackages
3 years ago

Another problem found. The two files below are in python3-libs-bytecode-opt-? but they are excluded from python3-libs (and hence not ghosted from them). We should make sure to also exclude them from the bytecode packages.

  • /usr/lib64/python3.9/ensurepip/_bundled/__pycache__/__init__.cpython-39.*pyc
  • /usr/lib64/python3.9/__pycache__/turtle.cpython-39.*pyc

I've pushed a commit that fixes and tests this.

I don't understand what you are proposing at all.

  1. Why would we put a marker to "that directory"? Which directory in particular, /usr/lib/.build-id or some directory within?

So, this is where I'm coming from: I think leaving a marker file on the filesystem is pretty ugly. It would be a little bit less horrible if we hide it somewhere so people don't run into it.

  1. Why would we remove a directory that is created by some process for some reasons that go beyond our scope? The /usr/lib/.build-id directory contains packages owned by many packages.

My question was whether we want this directory to be there? Because if not, we could combine two tings: 1. clean up unused files in the package, 2. the removal would serve as a proxy indicator to the version. I.e. if there's /usr/lib/.build-id owned by python3-libs, then this is the old version with .pyc files, if it's not there, it's the new version where .pyc files are optional.

  1. The /usr/lib/.build-id directory would be recreated by any package upgrade/installation. That means both python3-libs and any other RPM package.

Hm, I was somehow unaware that this is a thing until now. :o
In that case, that sadly doesn't help us. Thanks for brainstorming with me.

If anyone's interested, the /usr/lib/.build-id directory is a new thing in F27 and is related to debug info packages. Fedora change: https://fedoraproject.org/wiki/Changes/ParallelInstallableDebuginfo

It would be a little bit less horrible if we hide it somewhere so people don't run into it.

Let's agree to disagree. I consider it much uglier to choose an arbitrary directory that is used for a different purpose (however weird or obfuscated the path is) than choosing a visible directory designed for the purpose. If we put state somewhere, let's make it /var/lib/rpm-state/.

The upgrading one-time scriptlet could be designed like this:

  • python3-libs newly owns and ships /var/lib/rpm-state/python3.9-noclean-pycs marker
  • python3-libs runs a pretrans scriptlet that deletes a specified list of pyc files iff the marker does not exist yet
  • the scriptlet and marker can be removed in Fedora 36 or sooner if we upgrade to Python 3.10+

The problematic bit is to remove the correct files. We can either:

  1. Remove all *.cpython-39*.pyc files in __pycache__ directories below %{pylibdir} except those bellow %{pylibdir}/site-packages. This would also remove files that belong to python3-tkinter but we know that if python3-tkinter is installed, it is being upgraded in this transaction. Should be easy to script up, but a bit dangerous to figure out all the problems.

  2. Hardcode the list of files to be removed into the scriptlet. Safe but feels horrible.

Not that we cannot query rpm to give us the list of files we own. That would most certainly blow up.

The reason I'm wracking my brain as to alternative solutions is that I think the solutions we have are not great.

The most workable that I see now is using the marker file, but the concept itself is problematic, and the execution could be too.

As for the others:

Mitigation ideas:

  1. Do nothing and say this is expected. Such files would be removed upon upgrading to Python 3.10+.

This would mean we'll have a largely untested implementation being possibly shipped in the next EL.

  1. Obsolete previous versions of python3-libs from python3-libs and all the bytecode subpackages. When upgrading, users will get all of them, but optimization 1 and 2 would be removed when dnf autoremove is used. However if dnf autoremove is not used, such packages would also be upgraded upon upgrading to Python 3.10+.

I've never used dnf autoremove, and I think many people have neither, which would mean it'll get even less testing.

Build succeeded.

The upgrading one-time scriptlet could be designed like this:

  • python3-libs newly owns and ships /var/lib/rpm-state/python3.9-noclean-pycs marker
  • python3-libs runs a pretrans scriptlet that deletes a specified list of pyc files iff the marker does not exist yet

Ah, so the marker would be a normal rpm-owned file? I thought we would have to %ghost it and create it in the scriptlet, which is why I didn't like the idea much. But this is much better. +1

  • the scriptlet and marker can be removed in Fedora 36 or sooner if we upgrade to Python 3.10+

The problematic bit is to remove the correct files. We can either:

  1. Remove all *.cpython-39*.pyc files in __pycache__ directories below %{pylibdir} except those bellow %{pylibdir}/site-packages. This would also remove files that belong to python3-tkinter but we know that if python3-tkinter is installed, it is being upgraded in this transaction. Should be easy to script up, but a bit dangerous to figure out all the problems.

Yeah, having dealt with scriptlets in the past, I'm a bit worried about that.

  1. Hardcode the list of files to be removed into the scriptlet. Safe but feels horrible.

Agreed, but safe is important.

Ah, so the marker would be a normal rpm-owned file? I thought we would have to %ghost it and create it in the scriptlet, which is why I didn't like the idea much. But this is much better. +1

In this case we know that once the new python3-libs version is installed, it's done. Hence we can ship it and own it as regular file, yes.

Either this system started with python3-libs without bytecode cache and hence the scriptlet never run but all is OK because the bytecode was not there.

Or it was already upgraded and the scriptlet run in the past and all is OK.

Or the package was updated by some rpm command without running the scriptlet or the marker was created manually and all is not OK but the user is responsible here I'd say.

3 new commits added

  • Hanlde and test turtle.py and ensurepip/_bundled %excludes in bytecode subpackages
  • Split bytecode cache to optional subpackages
  • Sort files in %{dynload_dir}
3 years ago

@churchyard Sounds good to me.

Btw, if we do go with a marker, we need to update Upgrade/compatibility impact section of the FC.

Build succeeded.

1 new commit added

  • Proof of concept: Add RPM scriptlets related to bytecode cache split
3 years ago

I've pushed a proof of concept of the scriptlets. It is rather horrible :( but let's test it and I can optimize for being beautiful later :D

4 new commits added

  • Proof of concept: Add RPM scriptlets related to bytecode cache split
  • Hanlde and test turtle.py and ensurepip/_bundled %excludes in bytecode subpackages
  • Split bytecode cache to optional subpackages
  • Sort files in %{dynload_dir}
3 years ago

4 new commits added

  • Proof of concept: Add RPM scriptlets related to bytecode cache split
  • Hanlde and test turtle.py and ensurepip/_bundled %excludes in bytecode subpackages
  • Split bytecode cache to optional subpackages
  • Sort files in %{dynload_dir}
3 years ago

4 new commits added

  • Proof of concept: Add RPM scriptlets related to bytecode cache split
  • Hanlde and test turtle.py and ensurepip/_bundled %excludes in bytecode subpackages
  • Split bytecode cache to optional subpackages
  • Sort files in %{dynload_dir}
3 years ago

4 new commits added

  • Proof of concept: Add RPM scriptlets related to bytecode cache split
  • Hanlde and test turtle.py and ensurepip/_bundled %excludes in bytecode subpackages
  • Split bytecode cache to optional subpackages
  • Sort files in %{dynload_dir}
3 years ago

Yes, this works pretty much as desired:

$ mock -r fedora-rawhide-x86_64 install python3-libs  # from the fedora repos
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.pyc' | wc -l
549
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.opt-1.pyc' | wc -l
549
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.opt-2.pyc' | wc -l
549

# this will be the default upgrade path, we need to test also some others
$ mock -r fedora-rawhide-x86_64 install ./results_python3.9/3.9.0~rc1/3.fc34/python3-libs{,-bytecode-opt-0}-3.9.0~rc1-3.fc34.x86_64.rpm
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.pyc' | wc -l
549
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.opt-1.pyc' | wc -l
0
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.opt-2.pyc' | wc -l
0

# Install all
$ mock -r fedora-rawhide-x86_64 install ./results_python3.9/3.9.0~rc1/3.fc34/python3-libs-bytecode-opt-{1,2}-3.9.0~rc1-3.fc34.x86_64.rpm
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.pyc' | wc -l
549
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.opt-1.pyc' | wc -l
549
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.opt-2.pyc' | wc -l
549

# Remove some
$ mock -r fedora-rawhide-x86_64 remove python3-libs-bytecode-opt-2
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.pyc' | wc -l
549
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.opt-1.pyc' | wc -l
549
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.opt-2.pyc' | wc -l
0

$ mock -r fedora-rawhide-x86_64 remove python3-libs-bytecode-opt-1
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.pyc' | wc -l
549
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.opt-1.pyc' | wc -l
0
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.opt-2.pyc' | wc -l
0

$ mock -r fedora-rawhide-x86_64 remove python3-libs-bytecode-opt-0
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.pyc' | wc -l
0
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.opt-1.pyc' | wc -l
0
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.opt-2.pyc' | wc -l
0

# Install some back
$ mock -r fedora-rawhide-x86_64 install ./results_python3.9/3.9.0~rc1/3.fc34/python3-libs-bytecode-opt-{1,2}-3.9.0~rc1-3.fc34.x86_64.rpm
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.pyc' | wc -l
0
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.opt-1.pyc' | wc -l
549
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.opt-2.pyc' | wc -l
549


# Reinstall python3-libs
$ mock -r fedora-rawhide-x86_64 --copyin ./results_python3.9/3.9.0~rc1/3.fc34/python3-libs-3.9.0~rc1-3.fc34.x86_64.rpm /
$ mock -r fedora-rawhide-x86_64 shell
<mock-chroot> sh-5.0# rpm -iv --replacepkgs /python3-libs-3.9.0~rc1-3.fc34.x86_64.rpm 
Verifying packages...
Preparing packages...
python3-libs-3.9.0~rc1-3.fc34.x86_64

$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.pyc' | wc -l
0
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.opt-1.pyc' | wc -l
549
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.opt-2.pyc' | wc -l
549

# Create some files manually
$ mock -r fedora-rawhide-x86_64 install ./results_python3.9/3.9.0~rc1/3.fc34/python3-3.9.0~rc1-3.fc34.x86_64.rpm
<mock-chroot> sh-5.0# python3 -c pass
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.pyc' | wc -l
14

# Reinstall python3-libs again
<mock-chroot> sh-5.0# rpm -iv --replacepkgs /python3-libs-3.9.0~rc1-3.fc34.x86_64.rpm
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.pyc' | wc -l
14

Something I forgot?

If some files are deleted manually, it does not prevent the scriptlet from removing the rest of them:

$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.pyc' | wc -l
549
$ sudo find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/lib2to3 -name '*.cpython-39.pyc'  -delete
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.pyc' | wc -l
476
$ mock -r fedora-rawhide-x86_64 remove python3-libs-bytecode-opt-0
$ find /var/lib/mock/fedora-rawhide-x86_64/root/usr/lib64/python3.9/ -name '*.cpython-39.pyc' | wc -l
0

Build succeeded.

This is slowly becoming a nightmare. I've talked with @pviktori today and we'll see if we can restore the original "don't write bytecode cache" marker idea instead.

Closing this for now, will reopen if needed or open a new PR.

Pull-Request has been closed by churchyard

3 years ago

This is slowly becoming a nightmare. I've talked with @pviktori today and we'll see if we can restore the original "don't write bytecode cache" marker idea instead.

Closing this for now, will reopen if needed or open a new PR.

Can you explain more?

Do you mean the marker described in the minimization doc [0] under Problem 5.3: SELinux denials? And how would it help with the scriptlets?

[0] https://github.com/hroncok/python-minimization/blob/master/document.md

Do you mean the marker described in the minimization doc...

Yes.

And how would it help with the scriptlets?

No need for ghosts, no need for scriptlets.

Do you mean the marker described in the minimization doc...

Yes.

And how would it help with the scriptlets?

No need for ghosts, no need for scriptlets.

Hm, so we would not generate .pyc files by default at all? :/