#54 Rewrite scriptlets to call /sbin/ldconfig in lua
Merged a year ago by codonell. Opened a year ago by zbyszek.
rpms/ zbyszek/glibc ldconfig-scriptlets  into  rawhide

file modified
+59 -58
@@ -1,5 +1,5 @@ 

- %define glibcsrcdir glibc-2.35.9000-44-g3d9f171bfb

- %define glibcversion 2.35.9000

+ %global glibcsrcdir glibc-2.35.9000-44-g3d9f171bfb

+ %global glibcversion 2.35.9000

  # Pre-release tarballs are pulled in from git using a command that is

  # effectively:

  #
@@ -84,25 +84,25 @@ 

  ##############################################################################

  # Any architecture/kernel combination that supports running 32-bit and 64-bit

  # code in userspace is considered a biarch arch.

- %define biarcharches %{ix86} x86_64 s390 s390x

+ %global biarcharches %{ix86} x86_64 s390 s390x

  

  # Avoid generating a glibc-headers package on architectures which are

  # not biarch.

  %ifarch %{biarcharches}

- %define need_headers_package 1

+ %global need_headers_package 1

  %if 0%{?rhel} > 0

- %define headers_package_name glibc-headers

+ %global headers_package_name glibc-headers

  %else

  %ifarch %{ix86} x86_64

- %define headers_package_name glibc-headers-x86

+ %global headers_package_name glibc-headers-x86

  %endif

  %ifarch s390 s390x

- %define headers_package_name glibc-headers-s390

+ %global headers_package_name glibc-headers-s390

  %endif

  %dnl !rhel

  %endif

  %else

- %define need_headers_package 0

+ %global need_headers_package 0

  %dnl !biarcharches

  %endif

  
@@ -110,41 +110,41 @@ 

  # Utility functions for pre/post scripts.  Stick them at the beginning of

  # any lua %pre, %post, %postun, etc. sections to have them expand into

  # those scripts.  It only works in lua sections and not anywhere else.

- %define glibc_post_funcs() \

- -- We use lua posix.exec because there may be no shell that we can \

- -- run during glibc upgrade.  We used to implement much of %%post as a \

- -- C program, but from an overall maintenance perspective the lua in \

- -- the spec file was simpler and safer given the operations required. \

- -- All lua code will be ignored by rpm-ostree; see: \

- -- https://github.com/projectatomic/rpm-ostree/pull/1869 \

- -- If we add new lua actions to the %%post code we should coordinate \

- -- with rpm-ostree and ensure that their glibc install is functional. \

- function post_exec (program, ...) \

-   local pid = posix.fork () \

-   if pid == 0 then \

-     posix.exec (program, ...) \

-     assert (nil) \

-   elseif pid > 0 then \

-     posix.wait (pid) \

-   end \

- end \

- \

- function update_gconv_modules_cache () \

-   local iconv_dir = "%{_libdir}/gconv" \

-   local iconv_cache = iconv_dir .. "/gconv-modules.cache" \

-   local iconv_modules = iconv_dir .. "/gconv-modules" \

-   if (posix.utime (iconv_modules) == 0) then \

-     if (posix.utime (iconv_cache) == 0) then \

-       post_exec ("%{_prefix}/sbin/iconvconfig", \

- 		 "-o", iconv_cache, \

- 		 "--nostdlib", \

- 		 iconv_dir) \

-     else \

-       io.stdout:write ("Error: Missing " .. iconv_cache .. " file.\n") \

-     end \

-   end \

- end \

- %{nil}

+ %global glibc_post_funcs %{expand:

+ -- We use lua because there may be no shell that we can run during

+ -- glibc upgrade. We used to implement much of %%post as a C program,

+ -- but from an overall maintenance perspective the lua in the spec

+ -- file was simpler and safer given the operations required.

+ -- All lua code will be ignored by rpm-ostree; see:

+ -- https://github.com/projectatomic/rpm-ostree/pull/1869

+ -- If we add new lua actions to the %%post code we should coordinate

+ -- with rpm-ostree and ensure that their glibc install is functional.

+ --

+ -- Note: We use _prefix because Fedora's UsrMove says so.

+ function call_ldconfig ()

+   if not rpm.execute("%{_prefix}/sbin/ldconfig") then

+     io.stdout:write ("Error: call to %{_prefix}/sbin/ldconfig failed.\n")

+   end

+ end

+ 

+ function update_gconv_modules_cache ()

+   local iconv_dir = "%{_libdir}/gconv"

+   local iconv_cache = iconv_dir .. "/gconv-modules.cache"

+   local iconv_modules = iconv_dir .. "/gconv-modules"

+   if posix.utime(iconv_modules) == 0 then

+     if posix.utime (iconv_cache) == 0 then

+       if not rpm.execute("%{_prefix}/sbin/iconvconfig",

+ 		         "-o", iconv_cache,

+ 		         "--nostdlib",

+ 		         iconv_dir)

+       then

+ 	io.stdout:write ("Error: call to %{_prefix}/sbin/iconvconfig failed.\n")

+       end

+     else

+       io.stdout:write ("Error: Missing " .. iconv_cache .. " file.\n")

+     end

+   end

+ end}

  

  ##############################################################################

  # %%package glibc - The GNU C Library (glibc) core package.
@@ -208,7 +208,7 @@ 

  

  # The wrapper script relies on the fact that debugedit does not change

  # build IDs.

- %define _no_recompute_build_ids 1

+ %global _no_recompute_build_ids 1

  %undefine _unique_build_ids

  

  ##############################################################################
@@ -293,14 +293,14 @@ 

  

  # This GCC version is needed for -fstack-clash-protection support.

  BuildRequires: gcc >= 7.2.1-6

- %define enablekernel 3.2

+ %global enablekernel 3.2

  Conflicts: kernel < %{enablekernel}

- %define target %{_target_cpu}-redhat-linux

+ %global target %{_target_cpu}-redhat-linux

  %ifarch %{arm}

- %define target %{_target_cpu}-redhat-linuxeabi

+ %global target %{_target_cpu}-redhat-linuxeabi

  %endif

  %ifarch ppc64le

- %define target ppc64le-redhat-linux

+ %global target ppc64le-redhat-linux

  %endif

  

  # GNU make 4.0 introduced the -O option.
@@ -489,12 +489,14 @@ 

  

  # File triggers for when libraries are added or removed in standard

  # paths.

- %transfiletriggerin common -P 2000000 -- /lib /usr/lib /lib64 /usr/lib64

- /sbin/ldconfig

+ %transfiletriggerin common -P 2000000 -p <lua> -- /lib /usr/lib /lib64 /usr/lib64

+ %glibc_post_funcs

+ call_ldconfig()

  %end

  

- %transfiletriggerpostun common -P 2000000 -- /lib /usr/lib /lib64 /usr/lib64

- /sbin/ldconfig

+ %transfiletriggerpostun common -P 2000000 -p <lua> -- /lib /usr/lib /lib64 /usr/lib64

+ %glibc_post_funcs

+ call_ldconfig()

  %end

  

  # We need to run ldconfig manually because __brp_ldconfig assumes that
@@ -1123,8 +1125,8 @@ 

  # Special flag to enable annobin annotations for statically linked

  # assembler code.  Needs to be passed to make; not preserved by

  # configure.

- %define glibc_make_flags_as ASFLAGS="-g -Wa,--generate-missing-build-notes=yes"

- %define glibc_make_flags %{glibc_make_flags_as}

+ %global glibc_make_flags_as ASFLAGS="-g -Wa,--generate-missing-build-notes=yes"

+ %global glibc_make_flags %{glibc_make_flags_as}

  

  ##############################################################################

  # %%build - Generic options.
@@ -1195,7 +1197,7 @@ 

  # For a system glibc that subdirectory is "/" (the root of the filesystem).

  # This is called a sysroot (system root) and can be changed if we have a

  # distribution that supports multiple installed glibc versions.

- %define glibc_sysroot $RPM_BUILD_ROOT

+ %global glibc_sysroot $RPM_BUILD_ROOT

  

  # Remove existing file lists.

  find . -type f -name '*.filelist' -exec rm -rf {} \;
@@ -2048,8 +2050,7 @@ 

  -- the cache early to avoid any problems running binaries with

  -- the new glibc.

  

- -- Note: We use _prefix because Fedora's UsrMove says so.

- post_exec ("%{_prefix}/sbin/ldconfig")

+ call_ldconfig()

  

  -- (4) Update gconv modules cache.

  -- If the /usr/lib/gconv/gconv-modules.cache exists, then update it
@@ -2061,7 +2062,7 @@ 

  

  -- (5) On upgrades, restart systemd if installed.  "systemctl -q" does

  -- not suppress the error message (which is common in chroots), so

- -- open-code post_exec with standard error suppressed.

+ -- open-code rpm.execute with standard error suppressed.

  if tonumber(arg[2]) >= 2

     and posix.access("%{_prefix}/bin/systemctl", "x")

  then

The scriptlet in glibc adds a dependency on bash to glibc-common,
which is required by glibc, so effectively the whole world depends on
bash. By rewriting the script in lua we drop this dependency.

Quoting https://bugzilla.redhat.com/show_bug.cgi?id=2018913#c34:

warning: SCC #1: 5 members (5 external dependencies)
warning:        glibc-2.35-2.fc37.x86_64
warning:                -> glibc-minimal-langpack-2.35-2.fc37.x86_64
warning:                -> glibc-common-2.35-2.fc37.x86_64
warning:        ncurses-libs-6.2-9.20210508.fc36.x86_64
warning:                -> glibc-2.35-2.fc37.x86_64
warning:        bash-5.1.16-2.fc36.x86_64
warning:                -> ncurses-libs-6.2-9.20210508.fc36.x86_64
warning:                -> glibc-2.35-2.fc37.x86_64
warning:        glibc-common-2.35-2.fc37.x86_64
warning:                -> glibc-2.35-2.fc37.x86_64
warning:                -> bash-5.1.16-2.fc36.x86_64
warning:        glibc-minimal-langpack-2.35-2.fc37.x86_64
warning:                -> glibc-common-2.35-2.fc37.x86_64
warning:                -> glibc-2.35-2.fc37.x86_64

If we look into SCC #1, bash obviously requires glibc because it links
to it. The loop is created by glibc → glibc-common → bash, because
glibc-common has %transfiletriggerin and %transfiletriggerpostun using
bash to call /sbin/ldconfig.

https://github.com/coreos/rpm-ostree/pull/3453 provides a workaround so
that rpm-ostree doesn't choke on this.

Note that this is untested: I'm building glibc in mock right now, but since that takes a while,
I thought I'd post the patch here for comments. I'll report the results of the local test
here later.

Overall I think this is a good idea. It removes the last use of shell in all of the glibc triggers (%pre, %post, %postun, %transfiletrigger). This means we have only lua left in those triggers, which is far safer than attempting to run shell at these critical upgrade points. We do know that OCP, particularly CoreOS can't run those scripts and needs workarounds.

Thanks. I think we used shell here to accommodate rpm-ostree. If they are fine with this change, we should make it.

Why would the exec fail? Why not use post_exec() here?

Why would the exec fail? Why not use post_exec() here?

What I'd like to see here is for the triggers to do:
%glibc_post_funcs
post_exec ("%{_prefix}/sbin/ldconfig")
And then after we merge that I'll merge a cleanup that removes the assert. We no longer need an assert for a failed post_exec. The only users are ldconfig and iconvconfig, all of which we should allow a soft-fail here and write out an error message.

Build failed. More information on how to proceed and troubleshoot errors available at https://fedoraproject.org/wiki/Zuul-based-ci

rebased onto 4f7d89c

a year ago

I reworked this significantly based on @codonell's comments above and help from @ignatenkobrain.

Latest version looks great. I'm going to go ahead and merge this because I'm already testing a Fedora Rawhide weekly sync.

Pull-Request has been merged by codonell

a year ago

Thanks! Please cc me on any issues.

Thanks! Please cc me on any issues.

Thank you for the cleanup. I removed another assert for running systemctl (step 5 in %post for glibc) so we don't fail at that stage also, it should be a soft fail if can't restart systemd.

Build failed. More information on how to proceed and troubleshoot errors available at https://fedoraproject.org/wiki/Zuul-based-ci

We do know that OCP, particularly CoreOS can't run those scripts and needs workarounds.

I would say "rpm-ostree", which is a distinct thing from OCP and also not exclusive to CoreOS editions even. Notably rpm-ostree is also used by e.g. Fedora Silverblue as well as RHEL for Edge.

The canonical link here is https://github.com/coreos/rpm-ostree/pull/3453

It's also worth noting reason we do this is because lua conflicts with our goals of supporting offline (and transactional) updates by default. We're not out to impose restrictions - the goal is to empower our users to fearlessly upgrade their OS and roll back if something goes wrong. Eventually we will implement lua, but by running it as a sandboxed subprocess.

We do know that OCP, particularly CoreOS can't run those scripts and needs workarounds.

Let me spend even more time emphasizing this. We should avoid confusing/conflating mechanisms with branding. rpm-ostree is a mechanism, CoreOS is a brand.

In this Fedora Council thread we discuss the automotive analogy, where e.g. Ford is a car manufacturer that makes both a "Mustang" and "F-150" that are distinct things - but share a brand too.

Continuing the car analogy, increasingly different cars created by the same manufacturer have two very different mechanisms under the hood - gas vs electric. But they're the same brand. And certainly for e.g. Ford, the hope is that when you drive your electric car into the dealership, the repair shop doesn't say "oh that's not a real Ford".

Image based (e.g. rpm-ostree) vs traditional package (dnf/yum) update mechanisms are indeed quite different under the hood. But, having a different update mechanism doesn't make it not RHEL. To mix the analogies, it's essential for what I'm doing to succeed that when the operating system rolls into the repair shop, you and other engineers don't think "oh that's not RHEL, you need to talk to someone else".

Also, the car analogy is doubly strong here, becuase e.g. CentOS automotive uses (rpm-)ostree too today.

We do know that OCP, particularly CoreOS can't run those scripts and needs workarounds.

Let me spend even more time emphasizing this. We should avoid confusing/conflating mechanisms with branding. rpm-ostree is a mechanism, CoreOS is a brand.

Absolutely. Thanks for the clarification Colin! My apologies for conflating things there. I should have said rpm-ostree from the start.

I completely support the direction that rpm-ostree is going in, and we are consciously moving everything into lua, and then looking to delete as much state manipulation as we can get away with (we don't regenerate /usr/lib/locale-archive anymore). At least with lua we can run things in a reliable way in the sandbox.

It's also worth noting reason we do this is because lua conflicts with our goals of supporting offline (and transactional) updates by default.

I don't buy this argument. Effectively, rpm-ostree pushes maintainers to use bash scriptlets instead of lua scriptlets. This is because rpm-ostree chose to handle bash scriptlets in a custom way. It could (and IMO should) do the same for lua. The lua environment is very well defined and easy to embed, and if rpm-ostree can "wrap" the bash scriptlets, it can do the same for lua scriptlets.

FWIW, I very much support having less scriptlets. But the sad state is that we still use them and they are not going away any time soon.

It's also worth noting reason we do this is because lua conflicts with our goals of supporting offline (and transactional) updates by default.

I don't buy this argument. Effectively, rpm-ostree pushes maintainers to use bash scriptlets instead of lua scriptlets. This is because rpm-ostree chose to handle bash scriptlets in a custom way. It could (and IMO should) do the same for lua. The lua environment is very well defined and easy to embed, and if rpm-ostree can "wrap" the bash scriptlets, it can do the same for lua scriptlets.

The RPM Lua interpreter is currently not available separately. The system Lua interpreter has different extension modules.

Metadata