From 6c9f5a1ca4c9c15d29b2d6d683838605e743c776 Mon Sep 17 00:00:00 2001 From: Richard M. Shaw Date: Jul 15 2013 20:13:26 +0000 Subject: Update to latest upstream release. Upstream has accepted the NOP solution for arm and s390 but some patching may still be necessary. TBB is no longer required as the internal spinlocks are faster than the TBB implementation. --- diff --git a/.gitignore b/.gitignore index ecd139c..6fa6dd7 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ clog /oiio-Release-1.1.10.tar.gz /oiio-Release-1.1.11.tar.gz /oiio-Release-1.1.13.tar.gz +/oiio-Release-1.2.0.tar.gz diff --git a/OpenImageIO.spec b/OpenImageIO.spec index 82bbb92..75c7891 100644 --- a/OpenImageIO.spec +++ b/OpenImageIO.spec @@ -1,5 +1,5 @@ Name: OpenImageIO -Version: 1.1.13 +Version: 1.2.0 Release: 1%{?dist} Summary: Library for reading and writing images @@ -12,11 +12,9 @@ Source0: https://download.github.com/oiio-Release-%{version}.tar.gz #Source1: oiio-images.tar.gz Source101: FindTBB.cmake -Patch0: oiio-ppc.patch -Patch1: oiio-spinlocks.patch -Patch2: oiio-arm.patch -Patch3: oiio-s390.patch -Patch4: oiio-ppc.patch +Patch0: oiio-arm.patch +Patch1: oiio-ppc.patch +Patch2: oiio-s390.patch BuildRequires: cmake txt2man BuildRequires: qt4-devel @@ -82,10 +80,9 @@ Development files for package %{name} %prep %setup -q -n oiio-Release-%{version} -%patch1 -p1 -b .spinlocks -%patch2 -p1 -b .arm -%patch3 -p1 -b .s390 -%patch4 -p1 -b .ppc +%patch0 -p1 -b .arm +%patch1 -p1 -b .ppc +#patch2 -p1 -b .s390 # Install FindTBB.cmake install %{SOURCE101} src/cmake/modules/ @@ -162,6 +159,9 @@ cp -a doc/*.1 %{buildroot}%{_mandir}/man1 %changelog +* Fri Jul 15 2013 Richard Shaw - 1.2.0-1 +- Update to latest upstream release. + * Wed Jul 3 2013 Richard Shaw - 1.1.13-1 - Update to latest bugfix release. diff --git a/oiio-arm.patch b/oiio-arm.patch index ea2896b..2e83f86 100644 --- a/oiio-arm.patch +++ b/oiio-arm.patch @@ -1,17 +1,5 @@ --- oiio-Release-1.1.10/src/include/thread.h.orig 2013-04-01 00:27:21.337292586 +0100 +++ oiio-Release-1.1.10/src/include/thread.h 2013-04-01 00:30:08.910886481 +0100 -@@ -326,7 +326,11 @@ - { - #if defined(__GNUC__) - for (int i = 0; i < delay; ++i) { -+#if defined __arm__ -+ __asm__ __volatile__("NOP;"); -+#else - __asm__ __volatile__("pause;"); -+#endif - } - #elif USE_TBB - __TBB_Pause(delay); @@ -446,7 +450,12 @@ // Disallow copy construction by making private and unimplemented. diff --git a/oiio-ppc.patch b/oiio-ppc.patch index 00ef9a0..4b2954a 100644 --- a/oiio-ppc.patch +++ b/oiio-ppc.patch @@ -52,12 +52,3 @@ diff -up oiio-Release-1.1.10/src/include/thread.h.ppc oiio-Release-1.1.10/src/in #else # error No atomics on this platform. #endif -@@ -326,7 +340,7 @@ pause (int delay) - { - #if defined(__GNUC__) - for (int i = 0; i < delay; ++i) { --#if defined __arm__ || defined __s390__ -+#if defined __arm__ || defined __s390__ || defined __powerpc__ - __asm__ __volatile__("NOP;"); - #else - __asm__ __volatile__("pause;"); diff --git a/oiio-spinlocks.patch b/oiio-spinlocks.patch deleted file mode 100644 index 8fd97a7..0000000 --- a/oiio-spinlocks.patch +++ /dev/null @@ -1,362 +0,0 @@ -From a58d0ff4935ef14f32f01d4de362bba242f07e0c Mon Sep 17 00:00:00 2001 -From: Larry Gritz -Date: Sat, 4 May 2013 10:22:12 -0700 -Subject: [PATCH] spinlock tweaks that finally make it as good or better than - TBB. - ---- - src/include/thread.h | 89 ++++++++++++++++-------------------- - src/libOpenImageIO/atomic_test.cpp | 9 ++-- - src/libOpenImageIO/spinlock_test.cpp | 22 +++++++-- - src/libtexture/imagecache_pvt.h | 2 +- - 4 files changed, 62 insertions(+), 60 deletions(-) - -diff --git a/src/include/thread.h b/src/include/thread.h -index 28645fc..2cd03c1 100644 ---- a/src/include/thread.h -+++ b/src/include/thread.h -@@ -78,16 +78,22 @@ - // Some day, we hope this is all replaced by use of std::atomic<>. - #if USE_TBB - # include -- using tbb::atomic; - # include -+# define USE_TBB_ATOMIC 1 -+# define USE_TBB_SPINLOCK 1 -+#else -+# define USE_TBB_ATOMIC 0 -+# define USE_TBB_SPINLOCK 0 - #endif - -+ - #if defined(_MSC_VER) && !USE_TBB - # include - # include - # pragma intrinsic (_InterlockedExchangeAdd) - # pragma intrinsic (_InterlockedCompareExchange) - # pragma intrinsic (_InterlockedCompareExchange64) -+# pragma intrinsic (_ReadWriteBarrier) - # if defined(_WIN64) - # pragma intrinsic(_InterlockedExchangeAdd64) - # endif -@@ -105,10 +111,6 @@ - # endif - #endif - --#ifdef __APPLE__ --# include --#endif -- - #if defined(__GNUC__) && (defined(_GLIBCXX_ATOMIC_BUILTINS) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 401)) - #if !defined(__FreeBSD__) || defined(__x86_64__) - #define USE_GCC_ATOMICS -@@ -230,9 +232,6 @@ class thread_specific_ptr { - #elif USE_TBB - atomic *a = (atomic *)at; - return a->fetch_and_add (x); --#elif defined(no__APPLE__) -- // Apple, not inline for Intel (only PPC?) -- return OSAtomicAdd32Barrier (x, at) - x; - #elif defined(_MSC_VER) - // Windows - return _InterlockedExchangeAdd ((volatile LONG *)at, x); -@@ -251,9 +250,6 @@ class thread_specific_ptr { - #elif USE_TBB - atomic *a = (atomic *)at; - return a->fetch_and_add (x); --#elif defined(no__APPLE__) -- // Apple, not inline for Intel (only PPC?) -- return OSAtomicAdd64Barrier (x, at) - x; - #elif defined(_MSC_VER) - // Windows - # if defined(_WIN64) -@@ -282,8 +278,6 @@ class thread_specific_ptr { - #elif USE_TBB - atomic *a = (atomic *)at; - return a->compare_and_swap (newval, compareval) == newval; --#elif defined(no__APPLE__) -- return OSAtomicCompareAndSwap32Barrier (compareval, newval, at); - #elif defined(_MSC_VER) - return (_InterlockedCompareExchange ((volatile LONG *)at, newval, compareval) == compareval); - #else -@@ -301,8 +295,6 @@ class thread_specific_ptr { - #elif USE_TBB - atomic *a = (atomic *)at; - return a->compare_and_swap (newval, compareval) == newval; --#elif defined(no__APPLE__) -- return OSAtomicCompareAndSwap64Barrier (compareval, newval, at); - #elif defined(_MSC_VER) - return (_InterlockedCompareExchange64 ((volatile LONGLONG *)at, newval, compareval) == compareval); - #else -@@ -317,9 +309,7 @@ class thread_specific_ptr { - inline void - yield () - { --#if USE_TBB -- __TBB_Yield (); --#elif defined(__GNUC__) -+#if defined(__GNUC__) - sched_yield (); - #elif defined(_MSC_VER) - SwitchToThread (); -@@ -334,12 +324,12 @@ class thread_specific_ptr { - inline void - pause (int delay) - { --#if USE_TBB -- __TBB_Pause(delay); --#elif defined(__GNUC__) -+#if defined(__GNUC__) - for (int i = 0; i < delay; ++i) { - __asm__ __volatile__("pause;"); - } -+#elif USE_TBB -+ __TBB_Pause(delay); - #elif defined(_MSC_VER) - for (int i = 0; i < delay; ++i) { - #if defined (_WIN64) -@@ -369,14 +359,17 @@ class atomic_backoff { - yield(); - } - } -+ - private: - int m_count; - }; - - - --#if (! USE_TBB) --// If we're not using TBB, we need to define our own atomic<>. -+#if USE_TBB_ATOMIC -+using tbb::atomic; -+#else -+// If we're not using TBB's atomic, we need to define our own atomic<>. - - - /// Atomic integer. Increment, decrement, add, and subtract in a -@@ -456,7 +449,7 @@ class atomic { - }; - - --#endif /* ! USE_TBB */ -+#endif /* ! USE_TBB_ATOMIC */ - - - #ifdef NOTHREADS -@@ -478,7 +471,7 @@ class atomic { - typedef null_mutex spin_mutex; - typedef null_lock spin_lock; - --#elif USE_TBB -+#elif USE_TBB_SPINLOCK - - // Use TBB's spin locks - typedef tbb::spin_mutex spin_mutex; -@@ -529,63 +522,61 @@ class spin_mutex { - /// Acquire the lock, spin until we have it. - /// - void lock () { --#if defined(no__APPLE__) -- // OS X has dedicated spin lock routines, may as well use them. -- OSSpinLockLock ((OSSpinLock *)&m_locked); --#else - // To avoid spinning too tightly, we use the atomic_backoff to - // provide increasingly longer pauses, and if the lock is under - // lots of contention, eventually yield the timeslice. - atomic_backoff backoff; -+ - // Try to get ownership of the lock. Though experimentation, we - // found that OIIO_UNLIKELY makes this just a bit faster on - // gcc x86/x86_64 systems. - while (! OIIO_UNLIKELY(try_lock())) { - do { - backoff(); -- } while (*(volatile int *)&m_locked); -+ } while (m_locked); -+ - // The full try_lock() involves a compare_and_swap, which - // writes memory, and that will lock the bus. But a normal - // read of m_locked will let us spin until the value - // changes, without locking the bus. So it's faster to - // check in this manner until the mutex appears to be free. - } --#endif - } - - /// Release the lock that we hold. - /// - void unlock () { --#if defined(no__APPLE__) -- OSSpinLockUnlock ((OSSpinLock *)&m_locked); --#elif defined(__GNUC__) -- // GCC gives us an intrinsic that is even better, an atomic -- // assignment of 0 with "release" barrier semantics. -- __sync_lock_release ((volatile int *)&m_locked); -+#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) -+ // Fastest way to do it is with a store with "release" semantics -+ __asm__ __volatile__("": : :"memory"); -+ m_locked = 0; -+ // N.B. GCC gives us an intrinsic that is even better, an atomic -+ // assignment of 0 with "release" barrier semantics: -+ // __sync_lock_release (&m_locked); -+ // But empirically we found it not as performant as the above. -+#elif defined(_MSC_VER) -+ _ReadWriteBarrier(); -+ m_locked = 0; - #else - // Otherwise, just assign zero to the atomic (but that's a full - // memory barrier). -- m_locked = 0; -+ *(atomic_int *)&m_locked = 0; - #endif - } - - /// Try to acquire the lock. Return true if we have it, false if - /// somebody else is holding the lock. - bool try_lock () { --#if defined(no__APPLE__) -- return OSSpinLockTry ((OSSpinLock *)&m_locked); --#else --# if USE_TBB -+#if USE_TBB_ATOMIC - // TBB's compare_and_swap returns the original value -- return m_locked.compare_and_swap (0, 1) == 0; --# elif defined(__GNUC__) -+ return (*(atomic_int *)&m_locked).compare_and_swap (0, 1) == 0; -+#elif defined(__GNUC__) - // GCC gives us an intrinsic that is even better -- an atomic - // exchange with "acquire" barrier semantics. -- return __sync_lock_test_and_set ((volatile int *)&m_locked, 1) == 0; --# else -+ return __sync_lock_test_and_set (&m_locked, 1) == 0; -+#else - // Our compare_and_swap returns true if it swapped -- return m_locked.bool_compare_and_swap (0, 1); --# endif -+ return atomic_compare_and_exchange (&m_locked, 0, 1); - #endif - } - -@@ -603,7 +594,7 @@ class spin_mutex { - }; - - private: -- atomic_int m_locked; ///< Atomic counter is zero if nobody holds the lock -+ volatile int m_locked; ///< Atomic counter is zero if nobody holds the lock - }; - - -diff --git a/src/libOpenImageIO/atomic_test.cpp b/src/libOpenImageIO/atomic_test.cpp -index 2c1e807..42d469a 100644 ---- a/src/libOpenImageIO/atomic_test.cpp -+++ b/src/libOpenImageIO/atomic_test.cpp -@@ -49,7 +49,7 @@ - // and decrementing the crap out of it, and make sure it has the right - // value at the end. - --static int iterations = 160000000; -+static int iterations = 40000000; - static int numthreads = 16; - static int ntrials = 1; - static bool verbose = false; -@@ -184,16 +184,15 @@ int main (int argc, char *argv[]) - - static int threadcounts[] = { 1, 2, 4, 8, 12, 16, 20, 24, 28, 32, 64, 128, 1024, 1<<30 }; - for (int i = 0; threadcounts[i] <= numthreads; ++i) { -- int nt = threadcounts[i]; -+ int nt = wedge ? threadcounts[i] : numthreads; - int its = iterations/nt; - - double range; - double t = time_trial (boost::bind(test_atomics,nt,its), - ntrials, &range); - -- std::cout << Strutil::format ("%2d\t%s\t%5.1fs, range %.1f\t(%d iters/thread)\n", -- nt, Strutil::timeintervalformat(t), -- t, range, its); -+ std::cout << Strutil::format ("%2d\t%5.1f range %.2f\t(%d iters/thread)\n", -+ nt, t, range, its); - if (! wedge) - break; // don't loop if we're not wedging - } -diff --git a/src/libOpenImageIO/spinlock_test.cpp b/src/libOpenImageIO/spinlock_test.cpp -index 60c192b..64adbce 100644 ---- a/src/libOpenImageIO/spinlock_test.cpp -+++ b/src/libOpenImageIO/spinlock_test.cpp -@@ -50,7 +50,7 @@ - // accumulated value is equal to iterations*threads, then the spin locks - // worked. - --static int iterations = 160000000; -+static int iterations = 40000000; - static int numthreads = 16; - static int ntrials = 1; - static bool verbose = false; -@@ -58,6 +58,7 @@ - - static spin_mutex print_mutex; // make the prints not clobber each other - volatile long long accum = 0; -+float faccum = 0; - spin_mutex mymutex; - - -@@ -71,10 +72,22 @@ - std::cout << "thread " << boost::this_thread::get_id() - << ", accum = " << accum << "\n"; - } -+#if 1 - for (int i = 0; i < iterations; ++i) { - spin_lock lock (mymutex); - accum += 1; - } -+#else -+ // Alternate one that mixes in some math to make longer lock hold time, -+ // and also more to do between locks. Interesting contrast in timings. -+ float last = 0.0f; -+ for (int i = 0; i < iterations; ++i) { -+ last = fmodf (sinf(last), 1.0f); -+ spin_lock lock (mymutex); -+ accum += 1; -+ faccum = fmod (sinf(faccum+last), 1.0f); -+ } -+#endif - } - - -@@ -134,16 +147,15 @@ int main (int argc, char *argv[]) - - static int threadcounts[] = { 1, 2, 4, 8, 12, 16, 20, 24, 28, 32, 64, 128, 1024, 1<<30 }; - for (int i = 0; threadcounts[i] <= numthreads; ++i) { -- int nt = threadcounts[i]; -+ int nt = wedge ? threadcounts[i] : numthreads; - int its = iterations/nt; - - double range; - double t = time_trial (boost::bind(test_spinlock,nt,its), - ntrials, &range); - -- std::cout << Strutil::format ("%2d\t%s\t%5.1fs, range %.1f\t(%d iters/thread)\n", -- nt, Strutil::timeintervalformat(t), -- t, range, its); -+ std::cout << Strutil::format ("%2d\t%5.1f range %.2f\t(%d iters/thread)\n", -+ nt, t, range, its); - if (! wedge) - break; // don't loop if we're not wedging - } -diff --git a/src/libtexture/imagecache_pvt.h b/src/libtexture/imagecache_pvt.h -index 5d29782..3a49616 100644 ---- a/src/libtexture/imagecache_pvt.h -+++ b/src/libtexture/imagecache_pvt.h -@@ -1003,7 +1003,7 @@ class ImageCacheImpl : public ImageCache { - newval = oldval + incr; - // Now try to atomically swap it, and repeat until we've - // done it with nobody else interfering. --# if USE_TBB -+# if USE_TBB_ATOMIC - } while (llstat->compare_and_swap (*llnewval,*lloldval) != *lloldval); - # else - } while (llstat->bool_compare_and_swap (*llnewval,*lloldval)); --- -1.8.1.6 - diff --git a/sources b/sources index 5d48429..bbe6017 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -976c53880da3d003566e72eb8ebb6ef5 oiio-Release-1.1.13.tar.gz +4ea4cec743ef84a8c07625fadc8444ca oiio-Release-1.2.0.tar.gz