Blob Blame History Raw
From 9c94d906621e775f005fa34583671f08000f1723 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dan=20Hor=C3=A1k?= <dan@danny.cz>
Date: Thu, 4 Feb 2010 13:07:30 +0100
Subject: [PATCH] zipl: handle SSCH status

Description: zipl: handle status during IPL SSCH
Symptom:     System enters disabled wait during IPL of DASD disk when an
	     unexpected status (e.g. triggered by a flashcopy operation)
	     is recognized on the IPL device.
Problem:     Unexpected status is not handled correctly during the SSCH
	     portion of the IPL code.
Solution:    Introduce a more robust SSCH result handling which performs
	     retries and clears status when encountering errors.
---
 zipl/boot/common.S |  163 +++++++++++++++++++++++++++++++++++-----------------
 zipl/boot/eckd2.S  |    1 -
 zipl/boot/fba2.S   |    1 -
 3 files changed, 110 insertions(+), 55 deletions(-)

diff --git a/zipl/boot/common.S b/zipl/boot/common.S
index fa45e5a..108dbbf 100644
--- a/zipl/boot/common.S
+++ b/zipl/boot/common.S
@@ -249,69 +249,126 @@ _disable_device:
 	.endm	
 
 	.macro io_subroutines
+
+
+__LC_IO_NEW_PSW		= 0x78
+__LC_SUBCHANNEL_ID	= 0xb8
+
+#
+# Wait for I/O interrupt.
+#
+# Wait until status for the specified subchannel is available.
+#
+#   %r2 : subchannel id
+#   %r3 : address of irb
+#
+
+_wait_for_int:
+	lr     %r1,%r2
+	basr   %r4,0			# get base register
+0:
+	mvc    __LC_IO_NEW_PSW(8),4f-0b(%r4) # set i/o new psw
+1: # wait
+	lpsw   3f-0b(%r4)
+2: # continue
+	tsch   0(%r3)			# get status
+	brc    4,1b			# if cc=1 goto wait
+	br     %r14			# return
+        .align 8
+3:
+	.long  0x020a0000,0x80000000+1b	# enabled wait psw
+4:
+	.long  0x00080000,0x80000000+2b	# io new psw
+
 #
 # Start I/O
-#   %r2 : device subchannel id
+#
+# Attempt to start I/O defined by ORB on specified subchannel. Retry I/O
+# 256 times per path (recommended error recovery procedure for IFCCs) unless
+# a permanent path error condition is indicated. Try all paths twice to try to
+# work around "link flapping" (paths going down once each in the same order as
+# they are tried). Perform CLEAR SUBCHANNEL when switching paths to clear any
+# improper subchannel status.
+#
+#   %r2 : subchannel id
 #   %r3 : address of orb
 #   %r4 : address of irb
-#   %r5 : retry count
 #
 _ssch:
-        stm    %r6,%r15,24(%r15)
-        basr   %r13,0                   # base register
-0:      s      %r15,6f-0b(%r13)         # create stack frame
-	lr     %r12,%r2                 # save subchannel id
-	lr     %r11,%r3                 # save orb
-	lr     %r10,%r4                 # save irb
-	lr     %r9,%r5                  # save retry count
-	ic     %r0,.Llpm-0b(%r13)	# copy lpm to orb lpm
-	stc    %r0,6(%r3)
-1:	lr     %r1,%r12
-	ssch   0(%r11)                  # go
-        bnz    4f-0b(%r13)              # houston, we have a problem
-2:	lr     %r2,%r12                 # call _wait4de with subchannel id
-	lr     %r3,%r10                 # and irb address as parameters
-	bas    %r14,_wait4de-0b(%r13)   # wait until DE or error
-	tm     9(%r10),0xff             # test channel status
-	bnz    4f-0b(%r13)
-	tm     8(%r10),0xf3             # test device status
-	bz     5f-0b(%r13)
-	bct    %r9,1b-0b(%r13)          # something went wrong, retry.
-4:	l      %r2,7f-0b(%r13)
-	bas    %r4,_panik-0b(%r13)      # won't return
-5:	lm     %r6,%r15,120(%r15)
-        br     %r14
-6:	.long  96
-7:      .long  ESSCH
+	stm    %r6,%r15,24(%r15)
+	basr   %r13,0			# get base register
+0:
+	ahi    %r15,-96			# create stack frame
+	lr     %r6,%r2			# r6:  sid
+	lr     %r7,%r3			# r7:  orb
+	lr     %r8,%r4			# r8:  irb
+	sr     %r9,%r9			# r9:  initial lpm
+	ic     %r9,.Llpm-0b(%r13)
+	l      %r10,.Lmask-0b(%r13)	# r10: path mask
+1: # restart_all
+	lhi    %r11,256			# r11: retry counter
+2: # restart
+	stc    %r9,6(%r7)		# store initial lpm in orb
+	ltr    %r9,%r9			# if non-zero initial lpm
+	jne    3f			# then use initial lpm
+	stc    %r10,6(%r7)		# else use current path mask
+3: # gotlpm
+	lr     %r1,%r6			# get sid
+	ssch   0(%r7)			# start subchannel
+	brc    1,7f			# if cc==3 goto next_path
+	brc    7,6f                     # if cc!=0 goto retry
+4: # wait_for_int_loop
+	lr     %r2,%r6			# get sid
+	lr     %r3,%r8			# get irb
+	bras   %r14,_wait_for_int	# wait for interrupt
+	jnz    9f			# if cc!=0 goto panic
+	tm     0(%r8),0x3		# test irb deferred cc
+	brc    1,7f			# if cc==3 goto next_path
+	jz     5f			# if cc==0 goto no_stctl_check
+	tm     3(%r8),0x10		# test irb status control
+	jnz    6f			# if alert goto retry
+5: # no_stctl_check
+	tm     9(%r8),0xff		# test irb subchannel status
+	jnz    6f			# if status!=0 goto retry
+	tm     8(%r8),0xf3		# test irb unusual device status
+	jnz    6f			# if status!=0 goto retry
+	tm     8(%r8),0x4		# test irb device end
+	jz     4b			# if !device_end goto wait_for_int_loop
+	lm     %r6,%r15,120(%r15)
+	br     %r14			# return
+
+6: # retry
+	lr     %r1,%r6			# get sid
+	tsch   0(%r8)			# clear status if necessary
+	brct   %r11,2b			# if --retries>0 goto restart
+7: # next_path
+	ltr    %r9,%r9			# if initial lpm != 0
+	jnz    8f			# then goto noshift
+	srl    %r10,1			# path_mask >>= 1
+	ltr    %r10,%r10		# if path_mask==0
+	jz     9f			# then goto panic
+8: # noshift
+	sr     %r9,%r9			# clear initial lpm
+	lr     %r1,%r6			# get sid
+	csch				# clear subchannel
+	brc    7,9f			# if cc!=0 goto panic
+	lr     %r2,%r6			# get sid
+	lr     %r3,%r8			# get irb
+	bras   %r14,_wait_for_int	# wait for interrupt
+	j      1b			# goto restart_all
+9: # panic
+	l      %r2,.Lerrno-0b(%r13)	# get error code
+	bras   %r14,_panik		# panic
+
+.Lerrno:
+	.long  ESSCH
+.Lmask:
+	.long  0x8080
 .Llpm:
-	.byte  0xff
+	.byte  0x00
 	.align 2
 
 #
-# Wait for interrupt subroutine
-#   %r2 : device subchannel id
-#   %r3 : address of irb
-#
-_wait4de:
-	lr     %r1,%r2
-	basr   %r4,0
-0:      mvc    0x78(8),5f-0b(%r4)       # set i/o new psw
-1:	lpsw   4f-0b(%r4)
-2:      c      %r1,0xb8                 # compare subchannel id
-        bne    1b-0b(%r4)               # unequal -> continue waiting
-	tsch   0(%r3)
-	tm     9(%r3),0xff              # test channel status
-	bnz    3f-0b(%r4)
-	tm     8(%r3),0xf3              # got something unusual ?
-	bnz    3f-0b(%r4)
-        tm     8(%r3),0x04              # got device end ?
-        bz     1b-0b(%r4)               # still busy -> continue waiting
-3:      br     %r14
-        .align 8
-4:	.long  0x020a0000,0x80000000+1b
-5:	.long  0x00080000,0x80000000+2b # io new psw
-
-#
 # Panik routine. Loads a disabled wait psw
 #   %r2 : panik code
 #
diff --git a/zipl/boot/eckd2.S b/zipl/boot/eckd2.S
index ba71db9..b59ab0e 100644
--- a/zipl/boot/eckd2.S
+++ b/zipl/boot/eckd2.S
@@ -85,7 +85,6 @@ _load_direct:
         la     %r3,.Lorb-.Lbase(%r13)   # pass address of orb
         la     %r4,.Lirb-.Lbase(%r13)   # and pass address of irb
 	lr     %r2,%r11                 # pass subchannel id
-        la     %r5,5                    # 5 retries
         bas    %r14,_ssch-.Lbase(%r13)  # read records
 .Lexit:
         lr     %r2,%r12                 # return updated load address
diff --git a/zipl/boot/fba2.S b/zipl/boot/fba2.S
index 82b1447..90bb2cd 100644
--- a/zipl/boot/fba2.S
+++ b/zipl/boot/fba2.S
@@ -98,7 +98,6 @@ _load_direct:
 	lr     %r2,%r11                 # pass subchannel id
 	la     %r3,.Lorb-.Lbase(%r13)   # pass address of orb
 	la     %r4,.Lirb-.Lbase(%r13)   # and pass address of irb
-	la     %r5,5                    # 5 retries
         bas    %r14,_ssch-.Lbase(%r13)  # read up to 128 blocks
 	b      .Lmain-.Lbase(%r13)
 .Lexit:	lr     %r2,%r12                 # return updated load address
-- 
1.6.6