Blob Blame History Raw
#!/bin/sh
set -o nounset
set -o errexit

FORGEURL='https://github.com/pdfminer/pdfminer.six'

print_help()
{
	cat <<EOF
Usage: $1 VERSION

Generate a source archive for pdfminer.six with questionably-licensed sample
PDFs removed. The result will be named pdfminer.six-\${VERSION}-filtered.tar.xz
and will be written into the current working directory.
EOF
}

if [ "$#" != '1' ]
then
	exec 1>&2
	print_help "${0}"
	exit 1
elif [ "${1-}" = '-h' ] || [ "${1-}" = '--help' ]
then
	print_help "${0}"
	exit 0
fi

VERSION="${1}"
SOURCE0="${FORGEURL}/archive/${VERSION}/pdfminer.six-${VERSION}.tar.gz"
TARNAME="$(basename "${SOURCE0}")"
TARDIR="$(basename "${SOURCE0}" '.tar.gz')"
NEWTAR="${TARDIR}-filtered.tar.xz"

SAVEDIR="${PWD}"
XDIR="$(mktemp -d)"
trap "rm -rvf '${XDIR}'" INT TERM EXIT

cd "${XDIR}"
curl -L -O "${SOURCE0}"
tar -xzvf "${TARNAME}"
rm -rvf "${TARDIR}/samples"
if [ "$(find . -type d -name 'nonfree' | wc -l)" != '0' ]
then
	echo 'ERROR: did not properly remove problematic content' 1>&2
	exit 1
fi
tar -cv "${TARDIR}/" | xz -9e > "${NEWTAR}"

cd "${SAVEDIR}"
mv -v "${XDIR}/${NEWTAR}" .