Blob Blame History Raw
# Copyright (C) 2018  Dave love, University of Manchester
# Licence as for the package source

# The full tests are very time-consuming
%bcond_with fulltest

# We need to manipulate the built *.so.%%sover
%global sover .2.1.0
%global soshort .2

%if 0%{?el6}%{?el7}
# Use devtoolset for avx512 support
%ifnarch ppc64le ppc64
%global dts 9

# Avoid failure with missing pthread_once in check
%undefine _ld_as_needed

Name:		blis
Version:	0.7.0
Release:	5%{?dist}
Summary:	BLAS-like Library Instantiation Software Framework
License:	BSD
%if 0%{?commit}
# Don't identify s390x as 32-bit
Patch1:		blis-s390x.patch
BuildRequires:	perl
BuildRequires:	%{?dts:devtoolset-%{?dts}-binutils devtoolset-%{?dts}-}gcc
BuildRequires:	/usr/bin/python3 gcc-gfortran chrpath
# memkind is currently only relevant for KNL as far as I know, but
# might be relevant in future for other targets with HBM.  It needs
# updating in el7, and building for el8.  It should also support
# ppc64le and s390x, but they're not built.
%ifarch %{?el7:x86_64}  %{?fedora:x86_64}
BuildRequires: memkind-devel

%global desc \
BLIS is a portable software framework for instantiating\
high-performance BLAS-like dense linear algebra libraries.  The\
framework was designed to isolate essential kernels of computation\
that, when optimized, immediately enable optimized implementations of\
most of its commonly used and computationally intensive operations.\
While BLIS exports a new BLAS-like API, it also includes a BLAS\
compatibility layer which gives application developers access to BLIS\
implementations via traditional BLAS routine calls.\
This packaging contains automatically-dispatched\
architecture-optimized kernels for some targets, notably recent x86_64.


This is the serial version.

%package	devel
Summary:	Development files for %name
Requires:	%name%{?_isa} = %version-%release
Requires:	%name-openmp%{?_isa} = %version-%release
Requires:	%name-threads%{?_isa} = %version-%release
%if 0%{?__isa_bits} == 64
Requires:	%name-serial64%{?_isa} = %version-%release
Requires:	%name-openmp64%{?_isa} = %version-%release
Requires:	%name-threads64%{?_isa} = %version-%release

%description	devel
The %name-devel package contains libraries and header files for
developing applications that use %name.

%package serial64
Summary:	BLAS-like Library Instantiation Software Framework - 64-bit

%description serial64

This is the serial version with a 64-bit integer interface.

%package openmp
Summary:	BLAS-like Library Instantiation Software Framework - OpenMP

%description openmp

This is the OpenMP-parallelized version.

%package openmp64
Summary:	BLAS-like Library Instantiation Software Framework - OpenMP, 64-bit

%description openmp64

This is the OpenMP-parallelized version with a 64-bit integer interface.

# A pthreads version is necessary for Python (numpy) according to
# Debian openblas.
%package threads
Summary:	BLAS-like Library Instantiation Software Framework - pthreads

%description threads

This is the pthreads-parallelized version.

%package threads64
Summary:	BLAS-like Library Instantiation Software Framework - pthreads, 64-bit

%description threads64

This is the pthreads-parallelized version with a 64-bit integer interface.

%package srpm-macros
Summary:	BLIS architecture macros
BuildArch:	noarch

%description srpm-macros
BLIS architecture macros.

%setup -q %{?commit: -n %name-%commit}
# The soversion changed in release 0.7.0, but abipkgdiff suggests it
# shouldn't have, since only undocumented interfaces have changed from
# 0.6.0: removed bli_thread_get_env, bli_thread_init_rntm; indirect
# sub-types in bli_addd_ex; ARCH enum in bli_arch_query_id.
echo %sover | awk -F. '{printf("%s\n%s.%s\n", $2,$3,$4)}' >so_version
%patch1 -p1

%{?dts:. /opt/rh/devtoolset-%{?dts}/enable}
case %_arch in
x86_64) arch=x86_64 ;;
# a57 runs on all aarch64 and the optimized micro-kernel should be a
# better default than generic.
# Fixme:  Include my changes for arm and ppc micro-arch dispatch.
aarch64) arch=cortexa57 ;;
armv7hl) arch=cortexa9 ;;	# Similarly to aarch64
*) arch=generic ;;

# Hardening flags might be expected to affect performance, but appear
# not to.  With the f29 set and gcc 8 (but measured on EL6) for
# Haswell, a 5000×5000 DGEMM ran at 158295±565 MFLops with
# CFLAGS=$RPM_OPT_FLAGS and 158289±414 MFlops with no CFLAGS specified.
# Add back -O3, overridden by -O2 in RPM_OPT_FLAGS.
# -funsafe-math-optimizations vectorizes more, and passes tests
# <>
%global confflags --enable-debug=opt --disable-static --enable-shared --enable-verbose-make --enable-cblas
export CFLAGS="$RPM_OPT_FLAGS -O3 -funsafe-math-optimizations" LDFLAGS="%{?__global_ldflags}"
export PYTHON=python3		# Needed by both configure and make

# It's not an autotools configure
./configure --prefix=$(pwd)/o %confflags -t openmp $arch
%make_build SOFLAGS="-shared -Wl,"
make install

./configure --prefix=$(pwd)/p %confflags -t pthreads $arch
%make_build SOFLAGS="-shared -Wl,"
make install

# Rename the libraries per soname and generate BLAS_compatible ones
mkdir -p blisblas{,o,p,64,o64,p64}
for d in o p; do
  cd $d/lib
  mv $f ${f/./$d.}
  ln -s libblis$ libblis$
  ln -s libblis$  libblis$
  rm libblis.*
  cd ../..
  cc -shared -Wl, -L$(pwd)/$d/lib -lblis$d -o blisblas$d/ $LDFLAGS
  ln -s blisblas$d/

%if 0%{?__isa_bits} == 64

./configure --prefix=$(pwd)/64 %confflags -b 64 $arch
%make_build SOFLAGS="-shared -Wl,"
make install

./configure --prefix=$(pwd)/o64 %confflags -b 64 -t openmp $arch
%make_build SOFLAGS="-shared -Wl,"
make install

./configure --prefix=$(pwd)/p64 %confflags -b 64 -t pthreads $arch
%make_build SOFLAGS="-shared -Wl,"
make install

for d in 64 o64 p64; do
  cd $d/lib
  mv $f ${f/./$d.}
  ln -s libblis$ libblis$
  ln -s libblis$ libblis$
  rm -f libblis.*
  cd ../..
  cc -shared -Wl, -L$(pwd)/$d/lib -lblis$d -o blisblas$d/ $LDFLAGS
  ln -s blisblas$d/


# done last for the benefit of check
./configure --prefix=$(pwd)/serial %confflags $arch
make install
cc -shared -Wl, -L$(pwd)/serial/lib -lblis -o blisblas/ $LDFLAGS
ln -s blisblas/

mkdir -p %buildroot%_libdir %buildroot%_includedir

cp -a {serial,o,p}/lib/* %buildroot%_libdir
mv serial/include/blis %buildroot%_includedir
for d in o p; do
  cp -a $d/include/blis %buildroot%_includedir/blis$d
%if 0%{?__isa_bits} == 64
cp -a {64,o64,p64}/lib/* %buildroot%_libdir
for d in 64 o64 p64; do
  cp -a $d/include/blis %buildroot%_includedir/blis$d
# Needed for debuginfo processing
chmod +x %buildroot%_libdir/*.so.*
cp -a blisblas* %buildroot%_libdir
# This is quite large.
chrpath -d %buildroot%_libdir/*.so.*

cat <<EOF >README.Fedora
Fedora BLIS packaging

Similarly to the OpenBLAS packaging, as well as the serial library
(libblis), there are versions named with suffix "o" using OpenMP, and
suffix "p" using pthreads.  Also, on 64-bit targets, there are
versions built with 64-bit integer interfaces, which have suffix "64".
Thus "libblaso64" is built for 64-bit integers and OpenMP
parallelization.  The cblas interface is included in each version.

For the BLAS interface, BLIS and OpenBLAS are expected to have similar
performance where they are optimized for the same micro-architectures,
but do show some performance differences in either direction.  BLIS
supports AVX512 on KNL and SKX, which OpenBLAS currently doesn't, and
will be more than twice as fast on such systems, which are the main
targets for this packaging.  BLIS' non-BLAS interface is obviously a
potential advantage generally, but it isn't currently used by any
Fedora packages.

There are shared library shims in %_libdir/blisblas* for each version
that provide sonames or and so may be
linked dynamically instead of the reference libblas.  You can use an
ldconfig file so that this will be done automatically if the blis or
blis64 packages are installed, which will usually be a lot faster than
the reference version.  Otherwise, setting
LD_LIBRARY_PATH=%_libdir/blisblaso, say, will cause a binary
dynamically linked against libblas to run with the OpenMP BLIS version
instead, to allow multiple threads to be used.  The shims could be
extended to substitute the atlas and openblas libraries, but those can
be overridden by running with LD_PRELOAD=%_libdir/ in
the environment.

Runtime dispatch on the micro-architecture is currently only available
on x86_64.  aarch64 will use cortexa57 instructions.  Other
architectures use the "generic" target, so OpenBLAS will be faster on
any of them that it supports (arm, power64, ix86, and s390x in Fedora).

The blis-srpm-macros package defines RPM macro %blis_opt_arches for
the architectures with optimized implementations in case the list is
extended in future.


%global macrosdir %(d=%{_rpmconfigdir}/macros.d; [ -d $d ] || d=%{_sysconfdir}/rpm; echo $d)
mkdir -p %buildroot%macrosdir
cat <<EOF >%buildroot%macrosdir/macros.blis-srpm
# Architectures for for BLIS has an optimized implementation
%blis_opt_arches x86_64 aarch64

%{?dts:. /opt/rh/devtoolset-%{?dts}/enable}
# A quick check which tests the Fortran BLAS interface with gfortran,
# unlike the "test" or "check" targets.
# Fixme: check a 64-bit version where relevant
gfortran -o dblat blastest/src/fortran/dblat3.f -Lblisblas -Lserial/lib -lblas -lblis
LD_LIBRARY_PATH=$(pwd)/serial/lib:$(pwd)/blisblas ./dblat <<+ || { cat dblat3.summ && false; }
0 1 2 3 7 31 63
0.0 1.0 0.7
0.0 1.0 1.3

export LD_LIBRARY_PATH=`pwd`/serial/lib
%if %{with fulltest}
%make_build test
%make_build check

%ldconfig_scriptlets openmp
%ldconfig_scriptlets serial64
%ldconfig_scriptlets openmp64
%ldconfig_scriptlets threads
%ldconfig_scriptlets threads64

%global docs CHANGELOG.gz CREDITS README.Fedora

%doc %docs
%license LICENSE

%files openmp
%doc %docs
%license LICENSE

%files threads
%doc %docs
%license LICENSE

%if 0%{?__isa_bits} == 64

%files serial64
%doc %docs
%license LICENSE

%files openmp64
%doc %docs
%license LICENSE

%files threads64
%doc %docs
%license LICENSE


%files devel
%doc examples

%files srpm-macros

* Sat Aug 01 2020 Fedora Release Engineering <> - 0.7.0-5
- Second attempt - Rebuilt for

* Mon Jul 27 2020 Fedora Release Engineering <> - 0.7.0-4
- Rebuilt for

* Wed Jul  1 2020 Dave Love <> - 0.7.0-3
- Patch to build 64-, not 32-bit version on s390x (#1852549)

* Wed May 27 2020 Dave Love <> - 0.7.0-2
- Revert build for arches that don't actually have memkind packages

* Wed May 20 2020 Dave Love <> - 0.7.0-1
- New version
- Drop patch
- Maybe use devtoolset-9

* Sun Mar 15 2020 Dave love <> - 0.6.0-5
- Use cortexa9 config on arm32, rather than generic

* Tue Jan 28 2020 Fedora Release Engineering <> - 0.6.0-5
- Rebuilt for

* Wed Sep 11 2019 Dave love <> - 0.6.0-4
- Patch to avoid popen (security)
- Replace patch1 with upstream change

* Sat Aug 17 2019 Dave love <> - 0.6.0-3
- Patch out use of simd pragma
- Use devtoolset-8, not -6 on el6/7
- Fix dblat3 test

* Wed Jul 24 2019 Fedora Release Engineering <> - 0.6.0-2
- Rebuilt for

* Tue Jun  4 2019 Dave Love <> - 0.6.0-1
- New version

* Thu Feb 14 2019 Dave Love <> - 0.5.1-2
- Allow rebuilding for EPEL
- This version fixes #1674701
- Use -funsafe-math-optimizations

* Sun Feb  3 2019 Dave Love <> - 0.5.1-1
- New version with soname bump
- arm/arm64 families removed

* Thu Jan 31 2019 Fedora Release Engineering <> - 0.5.0-2
- Rebuilt for

* Thu Nov  8 2018 Dave Love <> - 0.5.0-1
- New version
- Drop python3 patch

* Thu Oct  4 2018 Dave Love <> - 0.4.1-2
- Use LDFLAGS for BLAS shims
- Add srpm-macros package
- Don't override -O3 in CFLAGS
- Maybe use devtoolset in %%check
- Remove files for blisblas libraries

* Tue Sep 11 2018 Dave Love <> - 0.4.1-1
- New version
- Fix running el6 check
- Use default compilation flags
- Use chrpath, %%ldconfig_scriptlets

* Wed Aug 15 2018 Dave Love <> - 0.4.0-1
- New version, with soname bump, build adjusted

* Mon Jun 18 2018 Dave Love <> - 0.3.2-7
- Use python3 explicitly, with patch

* Tue Jun  5 2018 Dave Love <> - 0.3.2-6
- Initial version for Fedora