Blob Blame History Raw
switch to write-biased r/w locks

This is to improve fairness: A permanent flow of read acquires can
otherwise lock out eventual writers indefinitely.

This is XSA-114 / CVE-2014-9065.

Signed-off-by: Keir Fraser <keir@xen.org>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -271,112 +271,151 @@ void _spin_unlock_recursive(spinlock_t *
 
 void _read_lock(rwlock_t *lock)
 {
+    uint32_t x;
+
     check_lock(&lock->debug);
-    while ( unlikely(!_raw_read_trylock(&lock->raw)) )
-    {
-        while ( likely(_raw_rw_is_write_locked(&lock->raw)) )
+    do {
+        while ( (x = lock->lock) & RW_WRITE_FLAG )
             cpu_relax();
-    }
+    } while ( cmpxchg(&lock->lock, x, x+1) != x );
     preempt_disable();
 }
 
 void _read_lock_irq(rwlock_t *lock)
 {
+    uint32_t x;
+
     ASSERT(local_irq_is_enabled());
     local_irq_disable();
     check_lock(&lock->debug);
-    while ( unlikely(!_raw_read_trylock(&lock->raw)) )
-    {
-        local_irq_enable();
-        while ( likely(_raw_rw_is_write_locked(&lock->raw)) )
-            cpu_relax();
-        local_irq_disable();
-    }
+    do {
+        if ( (x = lock->lock) & RW_WRITE_FLAG )
+        {
+            local_irq_enable();
+            while ( (x = lock->lock) & RW_WRITE_FLAG )
+                cpu_relax();
+            local_irq_disable();
+        }
+    } while ( cmpxchg(&lock->lock, x, x+1) != x );
     preempt_disable();
 }
 
 unsigned long _read_lock_irqsave(rwlock_t *lock)
 {
+    uint32_t x;
     unsigned long flags;
+
     local_irq_save(flags);
     check_lock(&lock->debug);
-    while ( unlikely(!_raw_read_trylock(&lock->raw)) )
-    {
-        local_irq_restore(flags);
-        while ( likely(_raw_rw_is_write_locked(&lock->raw)) )
-            cpu_relax();
-        local_irq_save(flags);
-    }
+    do {
+        if ( (x = lock->lock) & RW_WRITE_FLAG )
+        {
+            local_irq_restore(flags);
+            while ( (x = lock->lock) & RW_WRITE_FLAG )
+                cpu_relax();
+            local_irq_save(flags);
+        }
+    } while ( cmpxchg(&lock->lock, x, x+1) != x );
     preempt_disable();
     return flags;
 }
 
 int _read_trylock(rwlock_t *lock)
 {
+    uint32_t x;
+
     check_lock(&lock->debug);
-    if ( !_raw_read_trylock(&lock->raw) )
-        return 0;
+    do {
+        if ( (x = lock->lock) & RW_WRITE_FLAG )
+            return 0;
+    } while ( cmpxchg(&lock->lock, x, x+1) != x );
     preempt_disable();
     return 1;
 }
 
 void _read_unlock(rwlock_t *lock)
 {
+    uint32_t x, y;
+
     preempt_enable();
-    _raw_read_unlock(&lock->raw);
+    x = lock->lock;
+    while ( (y = cmpxchg(&lock->lock, x, x-1)) != x )
+        x = y;
 }
 
 void _read_unlock_irq(rwlock_t *lock)
 {
-    preempt_enable();
-    _raw_read_unlock(&lock->raw);
+    _read_unlock(lock);
     local_irq_enable();
 }
 
 void _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
-    preempt_enable();
-    _raw_read_unlock(&lock->raw);
+    _read_unlock(lock);
     local_irq_restore(flags);
 }
 
 void _write_lock(rwlock_t *lock)
 {
+    uint32_t x;
+
     check_lock(&lock->debug);
-    while ( unlikely(!_raw_write_trylock(&lock->raw)) )
-    {
-        while ( likely(_raw_rw_is_locked(&lock->raw)) )
+    do {
+        while ( (x = lock->lock) & RW_WRITE_FLAG )
             cpu_relax();
+    } while ( cmpxchg(&lock->lock, x, x|RW_WRITE_FLAG) != x );
+    while ( x != 0 )
+    {
+        cpu_relax();
+        x = lock->lock & ~RW_WRITE_FLAG;
     }
     preempt_disable();
 }
 
 void _write_lock_irq(rwlock_t *lock)
 {
+    uint32_t x;
+
     ASSERT(local_irq_is_enabled());
     local_irq_disable();
     check_lock(&lock->debug);
-    while ( unlikely(!_raw_write_trylock(&lock->raw)) )
+    do {
+        if ( (x = lock->lock) & RW_WRITE_FLAG )
+        {
+            local_irq_enable();
+            while ( (x = lock->lock) & RW_WRITE_FLAG )
+                cpu_relax();
+            local_irq_disable();
+        }
+    } while ( cmpxchg(&lock->lock, x, x|RW_WRITE_FLAG) != x );
+    while ( x != 0 )
     {
-        local_irq_enable();
-        while ( likely(_raw_rw_is_locked(&lock->raw)) )
-            cpu_relax();
-        local_irq_disable();
+        cpu_relax();
+        x = lock->lock & ~RW_WRITE_FLAG;
     }
     preempt_disable();
 }
 
 unsigned long _write_lock_irqsave(rwlock_t *lock)
 {
+    uint32_t x;
     unsigned long flags;
+
     local_irq_save(flags);
     check_lock(&lock->debug);
-    while ( unlikely(!_raw_write_trylock(&lock->raw)) )
+    do {
+        if ( (x = lock->lock) & RW_WRITE_FLAG )
+        {
+            local_irq_restore(flags);
+            while ( (x = lock->lock) & RW_WRITE_FLAG )
+                cpu_relax();
+            local_irq_save(flags);
+        }
+    } while ( cmpxchg(&lock->lock, x, x|RW_WRITE_FLAG) != x );
+    while ( x != 0 )
     {
-        local_irq_restore(flags);
-        while ( likely(_raw_rw_is_locked(&lock->raw)) )
-            cpu_relax();
-        local_irq_save(flags);
+        cpu_relax();
+        x = lock->lock & ~RW_WRITE_FLAG;
     }
     preempt_disable();
     return flags;
@@ -384,9 +423,13 @@ unsigned long _write_lock_irqsave(rwlock
 
 int _write_trylock(rwlock_t *lock)
 {
+    uint32_t x;
+
     check_lock(&lock->debug);
-    if ( !_raw_write_trylock(&lock->raw) )
-        return 0;
+    do {
+        if ( (x = lock->lock) != 0 )
+            return 0;
+    } while ( cmpxchg(&lock->lock, x, x|RW_WRITE_FLAG) != x );
     preempt_disable();
     return 1;
 }
@@ -394,33 +437,32 @@ int _write_trylock(rwlock_t *lock)
 void _write_unlock(rwlock_t *lock)
 {
     preempt_enable();
-    _raw_write_unlock(&lock->raw);
+    if ( cmpxchg(&lock->lock, RW_WRITE_FLAG, 0) != RW_WRITE_FLAG )
+        BUG();
 }
 
 void _write_unlock_irq(rwlock_t *lock)
 {
-    preempt_enable();
-    _raw_write_unlock(&lock->raw);
+    _write_unlock(lock);
     local_irq_enable();
 }
 
 void _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
-    preempt_enable();
-    _raw_write_unlock(&lock->raw);
+    _write_unlock(lock);
     local_irq_restore(flags);
 }
 
 int _rw_is_locked(rwlock_t *lock)
 {
     check_lock(&lock->debug);
-    return _raw_rw_is_locked(&lock->raw);
+    return (lock->lock != 0); /* anyone in critical section? */
 }
 
 int _rw_is_write_locked(rwlock_t *lock)
 {
     check_lock(&lock->debug);
-    return _raw_rw_is_write_locked(&lock->raw);
+    return (lock->lock == RW_WRITE_FLAG); /* writer in critical section? */
 }
 
 #ifdef LOCK_PROFILE
--- a/xen/include/asm-arm/spinlock.h
+++ b/xen/include/asm-arm/spinlock.h
@@ -55,84 +55,6 @@ static always_inline int _raw_spin_trylo
     }
 }
 
-typedef struct {
-    volatile unsigned int lock;
-} raw_rwlock_t;
-
-#define _RAW_RW_LOCK_UNLOCKED { 0 }
-
-static always_inline int _raw_read_trylock(raw_rwlock_t *rw)
-{
-    unsigned long tmp, tmp2 = 1;
-
-    __asm__ __volatile__(
-"1: ldrex   %0, [%2]\n"
-"   adds    %0, %0, #1\n"
-"   strexpl %1, %0, [%2]\n"
-    : "=&r" (tmp), "+r" (tmp2)
-    : "r" (&rw->lock)
-    : "cc");
-
-    smp_mb();
-    return tmp2 == 0;
-}
-
-static always_inline int _raw_write_trylock(raw_rwlock_t *rw)
-{
-    unsigned long tmp;
-
-    __asm__ __volatile__(
-"1: ldrex   %0, [%1]\n"
-"   teq     %0, #0\n"
-"   strexeq %0, %2, [%1]"
-    : "=&r" (tmp)
-    : "r" (&rw->lock), "r" (0x80000000)
-    : "cc");
-
-    if (tmp == 0) {
-        smp_mb();
-        return 1;
-    } else {
-        return 0;
-    }
-}
-
-static inline void _raw_read_unlock(raw_rwlock_t *rw)
-{
-    unsigned long tmp, tmp2;
-
-    smp_mb();
-
-    __asm__ __volatile__(
-"1: ldrex   %0, [%2]\n"
-"   sub     %0, %0, #1\n"
-"   strex   %1, %0, [%2]\n"
-"   teq     %1, #0\n"
-"   bne     1b"
-    : "=&r" (tmp), "=&r" (tmp2)
-    : "r" (&rw->lock)
-    : "cc");
-
-    if (tmp == 0)
-        dsb_sev();
-}
-
-static inline void _raw_write_unlock(raw_rwlock_t *rw)
-{
-    smp_mb();
-
-    __asm__ __volatile__(
-    "str    %1, [%0]\n"
-    :
-    : "r" (&rw->lock), "r" (0)
-    : "cc");
-
-    dsb_sev();
-}
-
-#define _raw_rw_is_locked(x) ((x)->lock != 0)
-#define _raw_rw_is_write_locked(x) ((x)->lock == 0x80000000)
-
 #endif /* __ASM_SPINLOCK_H */
 /*
  * Local variables:
--- a/xen/include/asm-x86/spinlock.h
+++ b/xen/include/asm-x86/spinlock.h
@@ -31,58 +31,4 @@ static always_inline int _raw_spin_trylo
     return (oldval > 0);
 }
 
-typedef struct {
-    volatile int lock;
-} raw_rwlock_t;
-
-#define RW_WRITE_BIAS 0x7fffffff
-#define _RAW_RW_LOCK_UNLOCKED /*(raw_rwlock_t)*/ { 0 }
-
-static always_inline int _raw_read_trylock(raw_rwlock_t *rw)
-{
-    int acquired;
-
-    asm volatile (
-        "    lock; decl %0         \n"
-        "    jns 2f                \n"
-#ifdef __clang__ /* clang's builtin assember can't do .subsection */
-        "1:  .pushsection .fixup,\"ax\"\n"
-#else
-        "1:  .subsection 1         \n"
-#endif
-        "2:  lock; incl %0         \n"
-        "    decl %1               \n"
-        "    jmp 1b                \n"
-#ifdef __clang__
-        "    .popsection           \n"
-#else
-        "    .subsection 0         \n"
-#endif
-        : "=m" (rw->lock), "=r" (acquired) : "1" (1) : "memory" );
-
-    return acquired;
-}
-
-static always_inline int _raw_write_trylock(raw_rwlock_t *rw)
-{
-    return (cmpxchg(&rw->lock, 0, RW_WRITE_BIAS) == 0);
-}
-
-static always_inline void _raw_read_unlock(raw_rwlock_t *rw)
-{
-    asm volatile (
-        "lock ; incl %0"
-        : "=m" ((rw)->lock) : : "memory" );
-}
-
-static always_inline void _raw_write_unlock(raw_rwlock_t *rw)
-{
-    asm volatile (
-        "lock ; subl %1,%0"
-        : "=m" ((rw)->lock) : "i" (RW_WRITE_BIAS) : "memory" );
-}
-
-#define _raw_rw_is_locked(x) ((x)->lock != 0)
-#define _raw_rw_is_write_locked(x) ((x)->lock > 0)
-
 #endif /* __ASM_SPINLOCK_H */
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -141,11 +141,13 @@ typedef struct spinlock {
 #define spin_lock_init(l) (*(l) = (spinlock_t)SPIN_LOCK_UNLOCKED)
 
 typedef struct {
-    raw_rwlock_t raw;
+    volatile uint32_t lock;
     struct lock_debug debug;
 } rwlock_t;
 
-#define RW_LOCK_UNLOCKED { _RAW_RW_LOCK_UNLOCKED, _LOCK_DEBUG }
+#define RW_WRITE_FLAG (1u<<31)
+
+#define RW_LOCK_UNLOCKED { 0, _LOCK_DEBUG }
 #define DEFINE_RWLOCK(l) rwlock_t l = RW_LOCK_UNLOCKED
 #define rwlock_init(l) (*(l) = (rwlock_t)RW_LOCK_UNLOCKED)