ba9c021
#!/bin/bash
ba9c021
#
ba9c021
# Bring up the kernel RDMA stack
ba9c021
#
ba9c021
# This is usually run automatically by systemd after a hardware activation
ba9c021
# event in udev has triggered a start of the rdma.service unit
ba9c021
#
ba9c021
1cfb1e8
shopt -s nullglob
1cfb1e8
ba9c021
CONFIG=/etc/rdma/rdma.conf
2641c6e
MTRR_SCRIPT=/usr/libexec/rdma-fixup-mtrr.awk
ba9c021
ba9c021
LOAD_ULP_MODULES=""
ba9c021
LOAD_CORE_USER_MODULES="ib_umad ib_uverbs ib_ucm rdma_ucm"
ba9c021
LOAD_CORE_CM_MODULES="iw_cm ib_cm rdma_cm"
ba9c021
LOAD_CORE_MODULES="ib_core ib_mad ib_sa ib_addr"
ba9c021
ba9c021
if [ -f $CONFIG ]; then
ba9c021
    . $CONFIG
ba9c021
ba9c021
    if [ "${RDS_LOAD}" == "yes" ]; then
ba9c021
        IPOIB_LOAD=yes
ba9c021
    fi
ba9c021
ba9c021
    if [ "${IPOIB_LOAD}" == "yes" ]; then
ba9c021
	LOAD_ULP_MODULES="ib_ipoib"
ba9c021
    fi
ba9c021
b5e6e42
    if [ "${RDS_LOAD}" == "yes" -a -f /lib/modules/`uname -r`/kernel/net/rds/rds.ko ]; then
ba9c021
	LOAD_ULP_MODULES="$LOAD_ULP_MODULES rds"
2641c6e
	if [ -f /lib/modules/`uname -r`/kernel/net/rds/rds_tcp.ko ]; then
2641c6e
	    LOAD_ULP_MODULES="$LOAD_ULP_MODULES rds_tcp"
2641c6e
	fi
2641c6e
	if [ -f /lib/modules/`uname -r`/kernel/net/rds/rds_rdma.ko ]; then
2641c6e
	    LOAD_ULP_MODULES="$LOAD_ULP_MODULES rds_rdma"
2641c6e
	fi
ba9c021
    fi
ba9c021
ba9c021
    if [ "${SRP_LOAD}" == "yes" ]; then
ba9c021
	LOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_srp"
ba9c021
    fi
ba9c021
1cfb1e8
    if [ "${SRPT_LOAD}" == "yes" ]; then
1cfb1e8
	LOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_srpt"
1cfb1e8
    fi
1cfb1e8
ba9c021
    if [ "${ISER_LOAD}" == "yes" ]; then
ba9c021
	LOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_iser"
ba9c021
    fi
2641c6e
2641c6e
    if [ "${ISERT_LOAD}" == "yes" ]; then
2641c6e
	LOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_isert"
2641c6e
    fi
ba9c021
else
ba9c021
    LOAD_ULP_MODULES="ib_ipoib"
ba9c021
fi
ba9c021
ba9c021
# If module $1 is loaded return - 0 else - 1
ba9c021
is_module()
ba9c021
{
ba9c021
    /sbin/lsmod | grep -w "$1" > /dev/null 2>&1
3eee2b8
    return $?
ba9c021
}
ba9c021
ba9c021
load_modules()
ba9c021
{
ba9c021
    local RC=0
ba9c021
ba9c021
    for module in $*; do
ba9c021
	if ! is_module $module; then
ba9c021
	    /sbin/modprobe $module
ba9c021
	    res=$?
ba9c021
	    RC=$[ $RC + $res ]
ba9c021
	    if [ $res -ne 0 ]; then
ba9c021
		echo
ba9c021
		echo -n "Failed to load module $mod"
ba9c021
	    fi
ba9c021
	fi
ba9c021
    done
ba9c021
    return $RC
ba9c021
}
ba9c021
ba9c021
# This function is a horrible hack to work around BIOS authors that should
ba9c021
# be shot.  Specifically, certain BIOSes will map the entire 4GB address
ba9c021
# space as write-back cacheable when the machine has 4GB or more of RAM, and
ba9c021
# then they will exclude the reserved PCI I/O addresses from that 4GB
ba9c021
# cacheable mapping by making on overlapping uncacheable mapping.  However,
ba9c021
# once you do that, it is then impossible to set *any* of the PCI I/O
ba9c021
# address space as write-combining.  This is an absolute death-knell to
ba9c021
# certain IB hardware.  So, we unroll this mapping here.  Instead of
ba9c021
# punching a hole in a single 4GB mapping, we redo the base 4GB mapping as
ba9c021
# a series of discreet mappings that effectively are the same as the 4GB
ba9c021
# mapping minus the hole, and then we delete the uncacheable mappings that
ba9c021
# are used to punch the hole.  This then leaves the PCI I/O address space
ba9c021
# unregistered (which defaults it to uncacheable), but available for
ba9c021
# write-combining mappings where needed.
ba9c021
check_mtrr_registers()
ba9c021
{
ba9c021
    # If we actually change the mtrr registers, then the awk script will
ba9c021
    # return true, and we need to unload the ib_ipath module if it's already
ba9c021
    # loaded.  The udevtrigger in load_hardware_modules will immediately
ba9c021
    # reload the ib_ipath module for us, so there shouldn't be a problem.
3eee2b8
    [ -f /proc/mtrr -a -f $MTRR_SCRIPT ] &&
ba9c021
	awk -f $MTRR_SCRIPT /proc/mtrr 2>/dev/null &&
ba9c021
	if is_module ib_ipath; then
ba9c021
		/sbin/rmmod ib_ipath
ba9c021
	fi
ba9c021
}
ba9c021
ba9c021
load_hardware_modules()
ba9c021
{
ba9c021
    local -i RC=0
ba9c021
ba9c021
    [ "$FIXUP_MTRR_REGS" = "yes" ] && check_mtrr_registers
ba9c021
    # We match both class NETWORK and class INFINIBAND devices since our
ba9c021
    # iWARP hardware is listed under class NETWORK.  The side effect of
ba9c021
    # this is that we might cause a non-iWARP network driver to be loaded.
ba9c021
    udevadm trigger --subsystem-match=pci --attr-nomatch=driver --attr-match=class=0x020000 --attr-match=class=0x0c0600
ba9c021
    udevadm settle
ba9c021
    if [ -r /proc/device-tree ]; then
ba9c021
	if [ -n "`ls /proc/device-tree | grep lhca`" ]; then
ba9c021
	    if ! is_module ib_ehca; then
ba9c021
		load_modules ib_ehca
ba9c021
		RC+=$?
ba9c021
	    fi
ba9c021
	fi
ba9c021
    fi
1cfb1e8
    if is_module be2net -a ! is_module ocrdma; then
1cfb1e8
	load_modules ocrdma
1cfb1e8
	RC+=$?
1cfb1e8
    fi
ba9c021
    return $RC
ba9c021
}
ba9c021
ba9c021
errata_58()
ba9c021
{
ba9c021
    # Check AMD chipset issue Errata #58
ba9c021
    if test -x /sbin/lspci && test -x /sbin/setpci; then
ba9c021
	if ( /sbin/lspci -nd 1022:1100 | grep "1100" > /dev/null ) &&
ba9c021
	   ( /sbin/lspci -nd 1022:7450 | grep "7450" > /dev/null ) &&
ba9c021
	   ( /sbin/lspci -nd 15b3:5a46 | grep "5a46" > /dev/null ); then
ba9c021
	    CURVAL=`/sbin/setpci -d 1022:1100 69`
ba9c021
	    for val in $CURVAL
ba9c021
	    do
ba9c021
		if [ "${val}" != "c0" ]; then
ba9c021
		    /sbin/setpci -d 1022:1100 69=c0
ba9c021
		    if [ $? -eq 0 ]; then
ba9c021
			break
ba9c021
		    else
ba9c021
			echo "Failed to apply AMD-8131 Errata #58 workaround"
ba9c021
		    fi
ba9c021
		fi
ba9c021
	    done
ba9c021
	fi
ba9c021
    fi
ba9c021
}
ba9c021
ba9c021
errata_56()
ba9c021
{
ba9c021
    # Check AMD chipset issue Errata #56
ba9c021
    if test -x /sbin/lspci && test -x /sbin/setpci; then
ba9c021
	if ( /sbin/lspci -nd 1022:1100 | grep "1100" > /dev/null ) &&
ba9c021
	   ( /sbin/lspci -nd 1022:7450 | grep "7450" > /dev/null ) &&
ba9c021
	   ( /sbin/lspci -nd 15b3:5a46 | grep "5a46" > /dev/null ); then
ba9c021
	    bus=""
ba9c021
	    # Look for devices AMD-8131
ba9c021
	    for dev in `/sbin/setpci -v -f -d 1022:7450 19 | cut -d':' -f1,2`
ba9c021
	    do
ba9c021
		bus=`/sbin/setpci -s $dev 19`
ba9c021
		rev=`/sbin/setpci -s $dev 8`
ba9c021
		# Look for Tavor attach to secondary bus of this devices
ba9c021
		for device in `/sbin/setpci -f -s $bus: -d 15b3:5a46 19`
ba9c021
		do
ba9c021
		    if [ $rev -lt 13 ]; then
ba9c021
			/sbin/setpci -d 15b3:5a44 72=14
ba9c021
			if [ $? -eq 0 ]; then
ba9c021
			    break
ba9c021
			else
ba9c021
			    echo
ba9c021
			    echo "Failed to apply AMD-8131 Errata #56 workaround"
ba9c021
			fi
ba9c021
		    else
ba9c021
			continue
ba9c021
		    fi
ba9c021
		    # If more than one device is on the bus the issue a
ba9c021
		    # warning
ba9c021
		    num=`/sbin/setpci -f -s $bus: 0 | wc -l |  sed 's/\ *//g'`
ba9c021
		    if [ $num -gt 1 ]; then
ba9c021
			echo "Warning: your current PCI-X configuration might be incorrect."
ba9c021
			echo "see AMD-8131 Errata 56 for more details."
ba9c021
		    fi
ba9c021
		done
ba9c021
	    done
ba9c021
	fi
ba9c021
    fi
ba9c021
}
ba9c021
ba9c021
load_hardware_modules
ba9c021
RC=$[ $RC + $? ]
ba9c021
load_modules $LOAD_CORE_MODULES
ba9c021
RC=$[ $RC + $? ]
ba9c021
load_modules $LOAD_CORE_CM_MODULES
ba9c021
RC=$[ $RC + $? ]
ba9c021
load_modules $LOAD_CORE_USER_MODULES
ba9c021
RC=$[ $RC + $? ]
ba9c021
load_modules $LOAD_ULP_MODULES
ba9c021
RC=$[ $RC + $? ]
3eee2b8
ba9c021
errata_58
ba9c021
errata_56
3eee2b8
ba9c021
exit $RC