Blob Blame History Raw
If primecount is built with GCC and -D_GLIBCXX_ASSERTIONS is in the build
flags, then some tests fail:

The following tests FAILED:
	  8 - S2_hard_xy (Child aborted)
	 10 - S2_xy (Child aborted)
	 11 - alpha (Child aborted)
	 12 - alpha_y (Child aborted)
	 13 - alpha_z (Child aborted)
	 35 - sieve1 (Child aborted)
	 36 - sieve2 (Child aborted)

Running these tests from the command line shows an assertion failure, e.g. for
S2_hard_xy:

S2_hard(4912, 16) = 0   OK
S2_hard(4913, 17) = 0/usr/include/c++/9/bits/stl_vector.h:1042: std::vector<_Tp, _Alloc>::reference std::vector<_Tp, _Alloc>::operator[](std::vector<_Tp, _Alloc>::size_type) [with _Tp = unsigned char; _Alloc = std::allocator<unsigned char>; std::vector<_Tp, _Alloc>::reference = unsigned char&; std::vector<_Tp, _Alloc>::size_type = long unsigned int]: Assertion '__builtin_expect(__n < this->size(), true)' failed.
The problem is this code, on lines 214 and 518 of src/Sieve.cpp:

    byte_t* sieve_end = &sieve_[sieve_size];

Since sieve_size is the number of elements in the vector, this code asks for
the address of the element just beyond the end of the vector.  But GCC's
implementation of operator[] for std::vector looks like this:

      reference
      operator[](size_type __n) _GLIBCXX_NOEXCEPT
      {
        __glibcxx_requires_subscript(__n);
        return *(this->_M_impl._M_start + __n);
      }

The __glibcxx_requires_subscript line is where the assertion failure happens.
There actually is a pointer dereference on the following line, so even if
assertions are not enabled, this code dereferences a pointer that points beyond
the vector.

Upstream elected to solve the problem by converting the vector into a C array,
as follows.  See
https://github.com/kimwalisch/primecount/commit/1c441f242739d3189787474943cf18690281072d

diff --git a/include/Sieve.hpp b/include/Sieve.hpp
index da2102c9..eb6a3a88 100644
--- a/include/Sieve.hpp
+++ b/include/Sieve.hpp
@@ -24,6 +24,7 @@
 
 #include <stdint.h>
 #include <cassert>
+#include <memory>
 #include <vector>
 
 namespace primecount {
@@ -94,11 +95,12 @@ public:
 
 private:
   void reset_sieve(uint64_t low, uint64_t high);
-  void set_sieve_size(uint64_t segment_size);
   void add(uint64_t prime);
 
   uint64_t start_;
-  std::vector<byte_t> sieve_;
+  uint64_t sieve_size_;
+  byte_t* sieve_;
+  std::unique_ptr<byte_t[]> deleter_;
   std::vector<Wheel> wheel_;
 };
 
diff --git a/src/Sieve.cpp b/src/Sieve.cpp
index 5aa4f3a4..f0f0ad43 100644
--- a/src/Sieve.cpp
+++ b/src/Sieve.cpp
@@ -27,6 +27,7 @@
 #include <algorithm>
 #include <array>
 #include <cassert>
+#include <memory>
 #include <vector>
 
 using namespace std;
@@ -91,17 +92,25 @@ Sieve::Sieve(uint64_t start,
   assert(segment_size % 240 == 0);
 
   start_ = start;
-  set_sieve_size(segment_size);
+  segment_size = get_segment_size(segment_size);
+
+  // sieve_size = segment_size / 30 as each byte corresponds
+  // to 30 numbers i.e. the 8 bits correspond to the
+  // offsets = {1, 7, 11, 13, 17, 19, 23, 29}.
+  sieve_size_ = segment_size / 30;
+  sieve_ = new byte_t[sieve_size_];
+  deleter_.reset(sieve_);
+
   wheel_.reserve(wheel_size);
   wheel_.resize(4);
 }
 
-/// The segment size (a.k.a. sieve distance) is sieve
-/// size * 30 as each byte contains 30 numbers.
+/// The segment size is sieve_size * 30 as each
+/// byte corresponds to 30 numbers.
 ///
 uint64_t Sieve::segment_size() const
 {
-  return sieve_.size() * 30;
+  return sieve_size_ * 30;
 }
 
 /// segment_size must be a multiple of 240 as we
@@ -118,16 +127,6 @@ uint64_t Sieve::get_segment_size(uint64_t size)
   return size;
 }
 
-/// Sieve size = segment_size / 30 as
-/// each byte contains 30 numbers.
-///
-void Sieve::set_sieve_size(uint64_t segment_size)
-{
-  segment_size = get_segment_size(segment_size);
-  uint64_t size = segment_size / 30;
-  sieve_.resize(size);
-}
-
 /// Count 1 bits inside [start, stop]
 uint64_t Sieve::count(uint64_t start, uint64_t stop) const
 {
@@ -141,15 +140,15 @@ uint64_t Sieve::count(uint64_t start, uint64_t stop) const
   uint64_t stop_idx = stop / 240;
   uint64_t m1 = unset_smaller[start % 240];
   uint64_t m2 = unset_larger[stop % 240];
-  auto sieve = (uint64_t*) &sieve_[0];
+  auto sieve64 = (uint64_t*) sieve_;
 
   if (start_idx == stop_idx)
-    bit_count = popcnt64(sieve[start_idx] & (m1 & m2));
+    bit_count = popcnt64(sieve64[start_idx] & (m1 & m2));
   else
   {
-    bit_count = popcnt64(sieve[start_idx] & m1);
-    bit_count += popcnt(&sieve[start_idx + 1], stop_idx - (start_idx + 1));
-    bit_count += popcnt64(sieve[stop_idx] & m2);
+    bit_count = popcnt64(sieve64[start_idx] & m1);
+    bit_count += popcnt(&sieve64[start_idx + 1], stop_idx - (start_idx + 1));
+    bit_count += popcnt64(sieve64[stop_idx] & m2);
   }
 
   return bit_count;
@@ -157,15 +156,16 @@ uint64_t Sieve::count(uint64_t start, uint64_t stop) const
 
 void Sieve::reset_sieve(uint64_t low, uint64_t high)
 {
-  fill(sieve_.begin(), sieve_.end(), 0xff);
+  fill_n(sieve_, sieve_size_, (byte_t) 0xff);
   uint64_t size = high - low;
 
   if (size < segment_size())
   {
-    set_sieve_size(size);
-    auto sieve = (uint64_t*) &sieve_[0];
-    uint64_t back = size - 1;
-    sieve[back / 240] &= unset_larger[back % 240];
+    uint64_t last = size - 1;
+    size = get_segment_size(size);
+    sieve_size_ = size / 30;
+    auto sieve64 = (uint64_t*) sieve_;
+    sieve64[last / 240] &= unset_larger[last % 240];
   }
 }
 
@@ -202,296 +202,289 @@ void Sieve::cross_off(uint64_t prime, uint64_t i)
   if (i >= wheel_.size())
     add(prime);
 
-  uint32_t sieve_size = (uint32_t) sieve_.size();
   Wheel& wheel = wheel_[i];
+  // pointer to the byte with the 1st multiple
+  byte_t* s = sieve_ + wheel.multiple;
+  byte_t* sieve_end = sieve_ + sieve_size_;
+  prime /= 30;
 
-  if (wheel.multiple >= sieve_size)
-    wheel.multiple -= sieve_size;
-  else
+  switch (wheel.index)
   {
-    // pointer to the byte with the 1st multiple 
-    byte_t* s = &sieve_[wheel.multiple];
-    byte_t* sieve_end = &sieve_[sieve_size];
-    prime /= 30;
-
-    switch (wheel.index)
+    for (;;)
     {
-      for (;;)
+      case 0: if (s >= sieve_end) { wheel.index = 0; break; }
+      unset_bit<0>(s); s += prime * 6 + 0;
+      case 1: if (s >= sieve_end) { wheel.index = 1; break; }
+      unset_bit<1>(s); s += prime * 4 + 0;
+      case 2: if (s >= sieve_end) { wheel.index = 2; break; }
+      unset_bit<2>(s); s += prime * 2 + 0;
+      case 3: if (s >= sieve_end) { wheel.index = 3; break; }
+      unset_bit<3>(s); s += prime * 4 + 0;
+      case 4: if (s >= sieve_end) { wheel.index = 4; break; }
+      unset_bit<4>(s); s += prime * 2 + 0;
+      case 5: if (s >= sieve_end) { wheel.index = 5; break; }
+      unset_bit<5>(s); s += prime * 4 + 0;
+      case 6: if (s >= sieve_end) { wheel.index = 6; break; }
+      unset_bit<6>(s); s += prime * 6 + 0;
+      case 7: if (s >= sieve_end) { wheel.index = 7; break; }
+      unset_bit<7>(s); s += prime * 2 + 1;
+
+      while (s + prime * 28 < sieve_end)
       {
-        if (s >= sieve_end) { wheel.index = 0; break; }
-        case 0: unset_bit<0>(s); s += prime * 6 + 0;
-        if (s >= sieve_end) { wheel.index = 1; break; }
-        case 1: unset_bit<1>(s); s += prime * 4 + 0;
-        if (s >= sieve_end) { wheel.index = 2; break; }
-        case 2: unset_bit<2>(s); s += prime * 2 + 0;
-        if (s >= sieve_end) { wheel.index = 3; break; }
-        case 3: unset_bit<3>(s); s += prime * 4 + 0;
-        if (s >= sieve_end) { wheel.index = 4; break; }
-        case 4: unset_bit<4>(s); s += prime * 2 + 0;
-        if (s >= sieve_end) { wheel.index = 5; break; }
-        case 5: unset_bit<5>(s); s += prime * 4 + 0;
-        if (s >= sieve_end) { wheel.index = 6; break; }
-        case 6: unset_bit<6>(s); s += prime * 6 + 0;
-        if (s >= sieve_end) { wheel.index = 7; break; }
-        case 7: unset_bit<7>(s); s += prime * 2 + 1;
-
-        while (s + prime * 28 < sieve_end)
-        {
-          unset_bit<0>(s + prime *  0);
-          unset_bit<1>(s + prime *  6);
-          unset_bit<2>(s + prime * 10);
-          unset_bit<3>(s + prime * 12);
-          unset_bit<4>(s + prime * 16);
-          unset_bit<5>(s + prime * 18);
-          unset_bit<6>(s + prime * 22);
-          unset_bit<7>(s + prime * 28);
-          s += prime * 30 + 1;
-        }
+        unset_bit<0>(s + prime *  0);
+        unset_bit<1>(s + prime *  6);
+        unset_bit<2>(s + prime * 10);
+        unset_bit<3>(s + prime * 12);
+        unset_bit<4>(s + prime * 16);
+        unset_bit<5>(s + prime * 18);
+        unset_bit<6>(s + prime * 22);
+        unset_bit<7>(s + prime * 28);
+        s += prime * 30 + 1;
       }
-      break;
+    }
+    break;
 
-      for (;;)
+    for (;;)
+    {
+      case  8: if (s >= sieve_end) { wheel.index =  8; break; }
+      unset_bit<1>(s); s += prime * 6 + 1;
+      case  9: if (s >= sieve_end) { wheel.index =  9; break; }
+      unset_bit<5>(s); s += prime * 4 + 1;
+      case 10: if (s >= sieve_end) { wheel.index = 10; break; }
+      unset_bit<4>(s); s += prime * 2 + 1;
+      case 11: if (s >= sieve_end) { wheel.index = 11; break; }
+      unset_bit<0>(s); s += prime * 4 + 0;
+      case 12: if (s >= sieve_end) { wheel.index = 12; break; }
+      unset_bit<7>(s); s += prime * 2 + 1;
+      case 13: if (s >= sieve_end) { wheel.index = 13; break; }
+      unset_bit<3>(s); s += prime * 4 + 1;
+      case 14: if (s >= sieve_end) { wheel.index = 14; break; }
+      unset_bit<2>(s); s += prime * 6 + 1;
+      case 15: if (s >= sieve_end) { wheel.index = 15; break; }
+      unset_bit<6>(s); s += prime * 2 + 1;
+
+      while (s + prime * 28 + 6 < sieve_end)
       {
-        if (s >= sieve_end) { wheel.index =  8; break; }
-        case  8: unset_bit<1>(s); s += prime * 6 + 1;
-        if (s >= sieve_end) { wheel.index =  9; break; }
-        case  9: unset_bit<5>(s); s += prime * 4 + 1;
-        if (s >= sieve_end) { wheel.index = 10; break; }
-        case 10: unset_bit<4>(s); s += prime * 2 + 1;
-        if (s >= sieve_end) { wheel.index = 11; break; }
-        case 11: unset_bit<0>(s); s += prime * 4 + 0;
-        if (s >= sieve_end) { wheel.index = 12; break; }
-        case 12: unset_bit<7>(s); s += prime * 2 + 1;
-        if (s >= sieve_end) { wheel.index = 13; break; }
-        case 13: unset_bit<3>(s); s += prime * 4 + 1;
-        if (s >= sieve_end) { wheel.index = 14; break; }
-        case 14: unset_bit<2>(s); s += prime * 6 + 1;
-        if (s >= sieve_end) { wheel.index = 15; break; }
-        case 15: unset_bit<6>(s); s += prime * 2 + 1;
-
-        while (s + prime * 28 + 6 < sieve_end)
-        {
-          unset_bit<1>(s + prime *  0 + 0);
-          unset_bit<5>(s + prime *  6 + 1);
-          unset_bit<4>(s + prime * 10 + 2);
-          unset_bit<0>(s + prime * 12 + 3);
-          unset_bit<7>(s + prime * 16 + 3);
-          unset_bit<3>(s + prime * 18 + 4);
-          unset_bit<2>(s + prime * 22 + 5);
-          unset_bit<6>(s + prime * 28 + 6);
-          s += prime * 30 + 7;
-        }
+        unset_bit<1>(s + prime *  0 + 0);
+        unset_bit<5>(s + prime *  6 + 1);
+        unset_bit<4>(s + prime * 10 + 2);
+        unset_bit<0>(s + prime * 12 + 3);
+        unset_bit<7>(s + prime * 16 + 3);
+        unset_bit<3>(s + prime * 18 + 4);
+        unset_bit<2>(s + prime * 22 + 5);
+        unset_bit<6>(s + prime * 28 + 6);
+        s += prime * 30 + 7;
       }
-      break;
+    }
+    break;
 
-      for (;;)
+    for (;;)
+    {
+      case 16: if (s >= sieve_end) { wheel.index = 16; break; }
+      unset_bit<2>(s); s += prime * 6 + 2;
+      case 17: if (s >= sieve_end) { wheel.index = 17; break; }
+      unset_bit<4>(s); s += prime * 4 + 2;
+      case 18: if (s >= sieve_end) { wheel.index = 18; break; }
+      unset_bit<0>(s); s += prime * 2 + 0;
+      case 19: if (s >= sieve_end) { wheel.index = 19; break; }
+      unset_bit<6>(s); s += prime * 4 + 2;
+      case 20: if (s >= sieve_end) { wheel.index = 20; break; }
+      unset_bit<1>(s); s += prime * 2 + 0;
+      case 21: if (s >= sieve_end) { wheel.index = 21; break; }
+      unset_bit<7>(s); s += prime * 4 + 2;
+      case 22: if (s >= sieve_end) { wheel.index = 22; break; }
+      unset_bit<3>(s); s += prime * 6 + 2;
+      case 23: if (s >= sieve_end) { wheel.index = 23; break; }
+      unset_bit<5>(s); s += prime * 2 + 1;
+
+      while (s + prime * 28 + 10 < sieve_end)
       {
-        if (s >= sieve_end) { wheel.index = 16; break; }
-        case 16: unset_bit<2>(s); s += prime * 6 + 2;
-        if (s >= sieve_end) { wheel.index = 17; break; }
-        case 17: unset_bit<4>(s); s += prime * 4 + 2;
-        if (s >= sieve_end) { wheel.index = 18; break; }
-        case 18: unset_bit<0>(s); s += prime * 2 + 0;
-        if (s >= sieve_end) { wheel.index = 19; break; }
-        case 19: unset_bit<6>(s); s += prime * 4 + 2;
-        if (s >= sieve_end) { wheel.index = 20; break; }
-        case 20: unset_bit<1>(s); s += prime * 2 + 0;
-        if (s >= sieve_end) { wheel.index = 21; break; }
-        case 21: unset_bit<7>(s); s += prime * 4 + 2;
-        if (s >= sieve_end) { wheel.index = 22; break; }
-        case 22: unset_bit<3>(s); s += prime * 6 + 2;
-        if (s >= sieve_end) { wheel.index = 23; break; }
-        case 23: unset_bit<5>(s); s += prime * 2 + 1;
-
-        while (s + prime * 28 + 10 < sieve_end)
-        {
-          unset_bit<2>(s + prime *  0 +  0);
-          unset_bit<4>(s + prime *  6 +  2);
-          unset_bit<0>(s + prime * 10 +  4);
-          unset_bit<6>(s + prime * 12 +  4);
-          unset_bit<1>(s + prime * 16 +  6);
-          unset_bit<7>(s + prime * 18 +  6);
-          unset_bit<3>(s + prime * 22 +  8);
-          unset_bit<5>(s + prime * 28 + 10);
-          s += prime * 30 + 11;
-        }
+        unset_bit<2>(s + prime *  0 +  0);
+        unset_bit<4>(s + prime *  6 +  2);
+        unset_bit<0>(s + prime * 10 +  4);
+        unset_bit<6>(s + prime * 12 +  4);
+        unset_bit<1>(s + prime * 16 +  6);
+        unset_bit<7>(s + prime * 18 +  6);
+        unset_bit<3>(s + prime * 22 +  8);
+        unset_bit<5>(s + prime * 28 + 10);
+        s += prime * 30 + 11;
       }
-      break;
+    }
+    break;
 
-      for (;;)
+    for (;;)
+    {
+      case 24: if (s >= sieve_end) { wheel.index = 24; break; }
+      unset_bit<3>(s); s += prime * 6 + 3;
+      case 25: if (s >= sieve_end) { wheel.index = 25; break; }
+      unset_bit<0>(s); s += prime * 4 + 1;
+      case 26: if (s >= sieve_end) { wheel.index = 26; break; }
+      unset_bit<6>(s); s += prime * 2 + 1;
+      case 27: if (s >= sieve_end) { wheel.index = 27; break; }
+      unset_bit<5>(s); s += prime * 4 + 2;
+      case 28: if (s >= sieve_end) { wheel.index = 28; break; }
+      unset_bit<2>(s); s += prime * 2 + 1;
+      case 29: if (s >= sieve_end) { wheel.index = 29; break; }
+      unset_bit<1>(s); s += prime * 4 + 1;
+      case 30: if (s >= sieve_end) { wheel.index = 30; break; }
+      unset_bit<7>(s); s += prime * 6 + 3;
+      case 31: if (s >= sieve_end) { wheel.index = 31; break; }
+      unset_bit<4>(s); s += prime * 2 + 1;
+
+      while (s + prime * 28 + 12 < sieve_end)
       {
-        if (s >= sieve_end) { wheel.index = 24; break; }
-        case 24: unset_bit<3>(s); s += prime * 6 + 3;
-        if (s >= sieve_end) { wheel.index = 25; break; }
-        case 25: unset_bit<0>(s); s += prime * 4 + 1;
-        if (s >= sieve_end) { wheel.index = 26; break; }
-        case 26: unset_bit<6>(s); s += prime * 2 + 1;
-        if (s >= sieve_end) { wheel.index = 27; break; }
-        case 27: unset_bit<5>(s); s += prime * 4 + 2;
-        if (s >= sieve_end) { wheel.index = 28; break; }
-        case 28: unset_bit<2>(s); s += prime * 2 + 1;
-        if (s >= sieve_end) { wheel.index = 29; break; }
-        case 29: unset_bit<1>(s); s += prime * 4 + 1;
-        if (s >= sieve_end) { wheel.index = 30; break; }
-        case 30: unset_bit<7>(s); s += prime * 6 + 3;
-        if (s >= sieve_end) { wheel.index = 31; break; }
-        case 31: unset_bit<4>(s); s += prime * 2 + 1;
-
-        while (s + prime * 28 + 12 < sieve_end)
-        {
-          unset_bit<3>(s + prime *  0 +  0);
-          unset_bit<0>(s + prime *  6 +  3);
-          unset_bit<6>(s + prime * 10 +  4);
-          unset_bit<5>(s + prime * 12 +  5);
-          unset_bit<2>(s + prime * 16 +  7);
-          unset_bit<1>(s + prime * 18 +  8);
-          unset_bit<7>(s + prime * 22 +  9);
-          unset_bit<4>(s + prime * 28 + 12);
-          s += prime * 30 + 13;
-        }
+        unset_bit<3>(s + prime *  0 +  0);
+        unset_bit<0>(s + prime *  6 +  3);
+        unset_bit<6>(s + prime * 10 +  4);
+        unset_bit<5>(s + prime * 12 +  5);
+        unset_bit<2>(s + prime * 16 +  7);
+        unset_bit<1>(s + prime * 18 +  8);
+        unset_bit<7>(s + prime * 22 +  9);
+        unset_bit<4>(s + prime * 28 + 12);
+        s += prime * 30 + 13;
       }
-      break;
+    }
+    break;
 
-      for (;;)
+    for (;;)
+    {
+      case 32: if (s >= sieve_end) { wheel.index = 32; break; }
+      unset_bit<4>(s); s += prime * 6 + 3;
+      case 33: if (s >= sieve_end) { wheel.index = 33; break; }
+      unset_bit<7>(s); s += prime * 4 + 3;
+      case 34: if (s >= sieve_end) { wheel.index = 34; break; }
+      unset_bit<1>(s); s += prime * 2 + 1;
+      case 35: if (s >= sieve_end) { wheel.index = 35; break; }
+      unset_bit<2>(s); s += prime * 4 + 2;
+      case 36: if (s >= sieve_end) { wheel.index = 36; break; }
+      unset_bit<5>(s); s += prime * 2 + 1;
+      case 37: if (s >= sieve_end) { wheel.index = 37; break; }
+      unset_bit<6>(s); s += prime * 4 + 3;
+      case 38: if (s >= sieve_end) { wheel.index = 38; break; }
+      unset_bit<0>(s); s += prime * 6 + 3;
+      case 39: if (s >= sieve_end) { wheel.index = 39; break; }
+      unset_bit<3>(s); s += prime * 2 + 1;
+
+      while (s + prime * 28 + 16 < sieve_end)
       {
-        if (s >= sieve_end) { wheel.index = 32; break; }
-        case 32: unset_bit<4>(s); s += prime * 6 + 3;
-        if (s >= sieve_end) { wheel.index = 33; break; }
-        case 33: unset_bit<7>(s); s += prime * 4 + 3;
-        if (s >= sieve_end) { wheel.index = 34; break; }
-        case 34: unset_bit<1>(s); s += prime * 2 + 1;
-        if (s >= sieve_end) { wheel.index = 35; break; }
-        case 35: unset_bit<2>(s); s += prime * 4 + 2;
-        if (s >= sieve_end) { wheel.index = 36; break; }
-        case 36: unset_bit<5>(s); s += prime * 2 + 1;
-        if (s >= sieve_end) { wheel.index = 37; break; }
-        case 37: unset_bit<6>(s); s += prime * 4 + 3;
-        if (s >= sieve_end) { wheel.index = 38; break; }
-        case 38: unset_bit<0>(s); s += prime * 6 + 3;
-        if (s >= sieve_end) { wheel.index = 39; break; }
-        case 39: unset_bit<3>(s); s += prime * 2 + 1;
-
-        while (s + prime * 28 + 16 < sieve_end)
-        {
-          unset_bit<4>(s + prime *  0 +  0);
-          unset_bit<7>(s + prime *  6 +  3);
-          unset_bit<1>(s + prime * 10 +  6);
-          unset_bit<2>(s + prime * 12 +  7);
-          unset_bit<5>(s + prime * 16 +  9);
-          unset_bit<6>(s + prime * 18 + 10);
-          unset_bit<0>(s + prime * 22 + 13);
-          unset_bit<3>(s + prime * 28 + 16);
-          s += prime * 30 + 17;
-        }
+        unset_bit<4>(s + prime *  0 +  0);
+        unset_bit<7>(s + prime *  6 +  3);
+        unset_bit<1>(s + prime * 10 +  6);
+        unset_bit<2>(s + prime * 12 +  7);
+        unset_bit<5>(s + prime * 16 +  9);
+        unset_bit<6>(s + prime * 18 + 10);
+        unset_bit<0>(s + prime * 22 + 13);
+        unset_bit<3>(s + prime * 28 + 16);
+        s += prime * 30 + 17;
       }
-      break;
+    }
+    break;
 
-      for (;;)
+    for (;;)
+    {
+      case 40: if (s >= sieve_end) { wheel.index = 40; break; }
+      unset_bit<5>(s); s += prime * 6 + 4;
+      case 41: if (s >= sieve_end) { wheel.index = 41; break; }
+      unset_bit<3>(s); s += prime * 4 + 2;
+      case 42: if (s >= sieve_end) { wheel.index = 42; break; }
+      unset_bit<7>(s); s += prime * 2 + 2;
+      case 43: if (s >= sieve_end) { wheel.index = 43; break; }
+      unset_bit<1>(s); s += prime * 4 + 2;
+      case 44: if (s >= sieve_end) { wheel.index = 44; break; }
+      unset_bit<6>(s); s += prime * 2 + 2;
+      case 45: if (s >= sieve_end) { wheel.index = 45; break; }
+      unset_bit<0>(s); s += prime * 4 + 2;
+      case 46: if (s >= sieve_end) { wheel.index = 46; break; }
+      unset_bit<4>(s); s += prime * 6 + 4;
+      case 47: if (s >= sieve_end) { wheel.index = 47; break; }
+      unset_bit<2>(s); s += prime * 2 + 1;
+
+      while (s + prime * 28 + 18 < sieve_end)
       {
-        if (s >= sieve_end) { wheel.index = 40; break; }
-        case 40: unset_bit<5>(s); s += prime * 6 + 4;
-        if (s >= sieve_end) { wheel.index = 41; break; }
-        case 41: unset_bit<3>(s); s += prime * 4 + 2;
-        if (s >= sieve_end) { wheel.index = 42; break; }
-        case 42: unset_bit<7>(s); s += prime * 2 + 2;
-        if (s >= sieve_end) { wheel.index = 43; break; }
-        case 43: unset_bit<1>(s); s += prime * 4 + 2;
-        if (s >= sieve_end) { wheel.index = 44; break; }
-        case 44: unset_bit<6>(s); s += prime * 2 + 2;
-        if (s >= sieve_end) { wheel.index = 45; break; }
-        case 45: unset_bit<0>(s); s += prime * 4 + 2;
-        if (s >= sieve_end) { wheel.index = 46; break; }
-        case 46: unset_bit<4>(s); s += prime * 6 + 4;
-        if (s >= sieve_end) { wheel.index = 47; break; }
-        case 47: unset_bit<2>(s); s += prime * 2 + 1;
-
-        while (s + prime * 28 + 18 < sieve_end)
-        {
-          unset_bit<5>(s + prime *  0 +  0);
-          unset_bit<3>(s + prime *  6 +  4);
-          unset_bit<7>(s + prime * 10 +  6);
-          unset_bit<1>(s + prime * 12 +  8);
-          unset_bit<6>(s + prime * 16 + 10);
-          unset_bit<0>(s + prime * 18 + 12);
-          unset_bit<4>(s + prime * 22 + 14);
-          unset_bit<2>(s + prime * 28 + 18);
-          s += prime * 30 + 19;
-        }
+        unset_bit<5>(s + prime *  0 +  0);
+        unset_bit<3>(s + prime *  6 +  4);
+        unset_bit<7>(s + prime * 10 +  6);
+        unset_bit<1>(s + prime * 12 +  8);
+        unset_bit<6>(s + prime * 16 + 10);
+        unset_bit<0>(s + prime * 18 + 12);
+        unset_bit<4>(s + prime * 22 + 14);
+        unset_bit<2>(s + prime * 28 + 18);
+        s += prime * 30 + 19;
       }
-      break;
+    }
+    break;
 
-      for (;;)
+    for (;;)
+    {
+      case 48: if (s >= sieve_end) { wheel.index = 48; break; }
+      unset_bit<6>(s); s += prime * 6 + 5;
+      case 49: if (s >= sieve_end) { wheel.index = 49; break; }
+      unset_bit<2>(s); s += prime * 4 + 3;
+      case 50: if (s >= sieve_end) { wheel.index = 50; break; }
+      unset_bit<3>(s); s += prime * 2 + 1;
+      case 51: if (s >= sieve_end) { wheel.index = 51; break; }
+      unset_bit<7>(s); s += prime * 4 + 4;
+      case 52: if (s >= sieve_end) { wheel.index = 52; break; }
+      unset_bit<0>(s); s += prime * 2 + 1;
+      case 53: if (s >= sieve_end) { wheel.index = 53; break; }
+      unset_bit<4>(s); s += prime * 4 + 3;
+      case 54: if (s >= sieve_end) { wheel.index = 54; break; }
+      unset_bit<5>(s); s += prime * 6 + 5;
+      case 55: if (s >= sieve_end) { wheel.index = 55; break; }
+      unset_bit<1>(s); s += prime * 2 + 1;
+
+      while (s + prime * 28 + 22 < sieve_end)
       {
-        if (s >= sieve_end) { wheel.index = 48; break; }
-        case 48: unset_bit<6>(s); s += prime * 6 + 5;
-        if (s >= sieve_end) { wheel.index = 49; break; }
-        case 49: unset_bit<2>(s); s += prime * 4 + 3;
-        if (s >= sieve_end) { wheel.index = 50; break; }
-        case 50: unset_bit<3>(s); s += prime * 2 + 1;
-        if (s >= sieve_end) { wheel.index = 51; break; }
-        case 51: unset_bit<7>(s); s += prime * 4 + 4;
-        if (s >= sieve_end) { wheel.index = 52; break; }
-        case 52: unset_bit<0>(s); s += prime * 2 + 1;
-        if (s >= sieve_end) { wheel.index = 53; break; }
-        case 53: unset_bit<4>(s); s += prime * 4 + 3;
-        if (s >= sieve_end) { wheel.index = 54; break; }
-        case 54: unset_bit<5>(s); s += prime * 6 + 5;
-        if (s >= sieve_end) { wheel.index = 55; break; }
-        case 55: unset_bit<1>(s); s += prime * 2 + 1;
-
-        while (s + prime * 28 + 22 < sieve_end)
-        {
-          unset_bit<6>(s + prime *  0 +  0);
-          unset_bit<2>(s + prime *  6 +  5);
-          unset_bit<3>(s + prime * 10 +  8);
-          unset_bit<7>(s + prime * 12 +  9);
-          unset_bit<0>(s + prime * 16 + 13);
-          unset_bit<4>(s + prime * 18 + 14);
-          unset_bit<5>(s + prime * 22 + 17);
-          unset_bit<1>(s + prime * 28 + 22);
-          s += prime * 30 + 23;
-        }
+        unset_bit<6>(s + prime *  0 +  0);
+        unset_bit<2>(s + prime *  6 +  5);
+        unset_bit<3>(s + prime * 10 +  8);
+        unset_bit<7>(s + prime * 12 +  9);
+        unset_bit<0>(s + prime * 16 + 13);
+        unset_bit<4>(s + prime * 18 + 14);
+        unset_bit<5>(s + prime * 22 + 17);
+        unset_bit<1>(s + prime * 28 + 22);
+        s += prime * 30 + 23;
       }
-      break;
+    }
+    break;
 
-      for (;;)
+    for (;;)
+    {
+      case 56: if (s >= sieve_end) { wheel.index = 56; break; }
+      unset_bit<7>(s); s += prime * 6 + 6;
+      case 57: if (s >= sieve_end) { wheel.index = 57; break; }
+      unset_bit<6>(s); s += prime * 4 + 4;
+      case 58: if (s >= sieve_end) { wheel.index = 58; break; }
+      unset_bit<5>(s); s += prime * 2 + 2;
+      case 59: if (s >= sieve_end) { wheel.index = 59; break; }
+      unset_bit<4>(s); s += prime * 4 + 4;
+      case 60: if (s >= sieve_end) { wheel.index = 60; break; }
+      unset_bit<3>(s); s += prime * 2 + 2;
+      case 61: if (s >= sieve_end) { wheel.index = 61; break; }
+      unset_bit<2>(s); s += prime * 4 + 4;
+      case 62: if (s >= sieve_end) { wheel.index = 62; break; }
+      unset_bit<1>(s); s += prime * 6 + 6;
+      case 63: if (s >= sieve_end) { wheel.index = 63; break; }
+      unset_bit<0>(s); s += prime * 2 + 1;
+
+      while (s + prime * 28 + 28 < sieve_end)
       {
-        if (s >= sieve_end) { wheel.index = 56; break; }
-        case 56: unset_bit<7>(s); s += prime * 6 + 6;
-        if (s >= sieve_end) { wheel.index = 57; break; }
-        case 57: unset_bit<6>(s); s += prime * 4 + 4;
-        if (s >= sieve_end) { wheel.index = 58; break; }
-        case 58: unset_bit<5>(s); s += prime * 2 + 2;
-        if (s >= sieve_end) { wheel.index = 59; break; }
-        case 59: unset_bit<4>(s); s += prime * 4 + 4;
-        if (s >= sieve_end) { wheel.index = 60; break; }
-        case 60: unset_bit<3>(s); s += prime * 2 + 2;
-        if (s >= sieve_end) { wheel.index = 61; break; }
-        case 61: unset_bit<2>(s); s += prime * 4 + 4;
-        if (s >= sieve_end) { wheel.index = 62; break; }
-        case 62: unset_bit<1>(s); s += prime * 6 + 6;
-        if (s >= sieve_end) { wheel.index = 63; break; }
-        case 63: unset_bit<0>(s); s += prime * 2 + 1;
-
-        while (s + prime * 28 + 28 < sieve_end)
-        {
-          unset_bit<7>(s + prime *  0 +  0);
-          unset_bit<6>(s + prime *  6 +  6);
-          unset_bit<5>(s + prime * 10 + 10);
-          unset_bit<4>(s + prime * 12 + 12);
-          unset_bit<3>(s + prime * 16 + 16);
-          unset_bit<2>(s + prime * 18 + 18);
-          unset_bit<1>(s + prime * 22 + 22);
-          unset_bit<0>(s + prime * 28 + 28);
-          s += prime * 30 + 29;
-        }
+        unset_bit<7>(s + prime *  0 +  0);
+        unset_bit<6>(s + prime *  6 +  6);
+        unset_bit<5>(s + prime * 10 + 10);
+        unset_bit<4>(s + prime * 12 + 12);
+        unset_bit<3>(s + prime * 16 + 16);
+        unset_bit<2>(s + prime * 18 + 18);
+        unset_bit<1>(s + prime * 22 + 22);
+        unset_bit<0>(s + prime * 28 + 28);
+        s += prime * 30 + 29;
       }
-      break;
     }
-
-    // update for the next segment
-    wheel.multiple = (uint32_t) (s - sieve_end);
+    break;
   }
+
+  // update for the next segment
+  wheel.multiple = (uint32_t) (s - sieve_end);
 }
 
 /// Remove the i-th prime and the multiples of the i-th
@@ -506,361 +499,354 @@ uint64_t Sieve::cross_off_count(uint64_t prime, uint64_t i)
     add(prime);
 
   uint64_t cnt = 0;
-  uint32_t sieve_size = (uint32_t) sieve_.size();
   Wheel& wheel = wheel_[i];
+  // pointer to the byte with the 1st multiple
+  byte_t* s = sieve_ + wheel.multiple;
+  byte_t* sieve_end = sieve_ + sieve_size_;
+  prime /= 30;
 
-  if (wheel.multiple >= sieve_size)
-    wheel.multiple -= sieve_size;
-  else
+  switch (wheel.index)
   {
-    // pointer to the byte with the 1st multiple 
-    byte_t* s = &sieve_[wheel.multiple];
-    byte_t* sieve_end = &sieve_[sieve_size];
-    prime /= 30;
-
-    switch (wheel.index)
+    for (;;)
     {
-      for (;;)
+      case 0: if (s >= sieve_end) { wheel.index = 0; break; }
+      cnt += is_bit<0>(s); unset_bit<0>(s); s += prime * 6 + 0;
+      case 1: if (s >= sieve_end) { wheel.index = 1; break; }
+      cnt += is_bit<1>(s); unset_bit<1>(s); s += prime * 4 + 0;
+      case 2: if (s >= sieve_end) { wheel.index = 2; break; }
+      cnt += is_bit<2>(s); unset_bit<2>(s); s += prime * 2 + 0;
+      case 3: if (s >= sieve_end) { wheel.index = 3; break; }
+      cnt += is_bit<3>(s); unset_bit<3>(s); s += prime * 4 + 0;
+      case 4: if (s >= sieve_end) { wheel.index = 4; break; }
+      cnt += is_bit<4>(s); unset_bit<4>(s); s += prime * 2 + 0;
+      case 5: if (s >= sieve_end) { wheel.index = 5; break; }
+      cnt += is_bit<5>(s); unset_bit<5>(s); s += prime * 4 + 0;
+      case 6: if (s >= sieve_end) { wheel.index = 6; break; }
+      cnt += is_bit<6>(s); unset_bit<6>(s); s += prime * 6 + 0;
+      case 7: if (s >= sieve_end) { wheel.index = 7; break; }
+      cnt += is_bit<7>(s); unset_bit<7>(s); s += prime * 2 + 1;
+
+      while (s + prime * 28 < sieve_end)
       {
-        if (s >= sieve_end) { wheel.index = 0; break; }
-        case 0: cnt += is_bit<0>(s); unset_bit<0>(s); s += prime * 6 + 0;
-        if (s >= sieve_end) { wheel.index = 1; break; }
-        case 1: cnt += is_bit<1>(s); unset_bit<1>(s); s += prime * 4 + 0;
-        if (s >= sieve_end) { wheel.index = 2; break; }
-        case 2: cnt += is_bit<2>(s); unset_bit<2>(s); s += prime * 2 + 0;
-        if (s >= sieve_end) { wheel.index = 3; break; }
-        case 3: cnt += is_bit<3>(s); unset_bit<3>(s); s += prime * 4 + 0;
-        if (s >= sieve_end) { wheel.index = 4; break; }
-        case 4: cnt += is_bit<4>(s); unset_bit<4>(s); s += prime * 2 + 0;
-        if (s >= sieve_end) { wheel.index = 5; break; }
-        case 5: cnt += is_bit<5>(s); unset_bit<5>(s); s += prime * 4 + 0;
-        if (s >= sieve_end) { wheel.index = 6; break; }
-        case 6: cnt += is_bit<6>(s); unset_bit<6>(s); s += prime * 6 + 0;
-        if (s >= sieve_end) { wheel.index = 7; break; }
-        case 7: cnt += is_bit<7>(s); unset_bit<7>(s); s += prime * 2 + 1;
-
-        while (s + prime * 28 < sieve_end)
-        {
-          cnt += is_bit<0>(s + prime *  0);
-          unset_bit<0>(s + prime *  0);
-          cnt += is_bit<1>(s + prime *  6);
-          unset_bit<1>(s + prime *  6);
-          cnt += is_bit<2>(s + prime * 10);
-          unset_bit<2>(s + prime * 10);
-          cnt += is_bit<3>(s + prime * 12);
-          unset_bit<3>(s + prime * 12);
-          cnt += is_bit<4>(s + prime * 16);
-          unset_bit<4>(s + prime * 16);
-          cnt += is_bit<5>(s + prime * 18);
-          unset_bit<5>(s + prime * 18);
-          cnt += is_bit<6>(s + prime * 22);
-          unset_bit<6>(s + prime * 22);
-          cnt += is_bit<7>(s + prime * 28);
-          unset_bit<7>(s + prime * 28);
-          s += prime * 30 + 1;
-        }
+        cnt += is_bit<0>(s + prime *  0);
+        unset_bit<0>(s + prime *  0);
+        cnt += is_bit<1>(s + prime *  6);
+        unset_bit<1>(s + prime *  6);
+        cnt += is_bit<2>(s + prime * 10);
+        unset_bit<2>(s + prime * 10);
+        cnt += is_bit<3>(s + prime * 12);
+        unset_bit<3>(s + prime * 12);
+        cnt += is_bit<4>(s + prime * 16);
+        unset_bit<4>(s + prime * 16);
+        cnt += is_bit<5>(s + prime * 18);
+        unset_bit<5>(s + prime * 18);
+        cnt += is_bit<6>(s + prime * 22);
+        unset_bit<6>(s + prime * 22);
+        cnt += is_bit<7>(s + prime * 28);
+        unset_bit<7>(s + prime * 28);
+        s += prime * 30 + 1;
       }
-      break;
+    }
+    break;
 
-      for (;;)
+    for (;;)
+    {
+      case  8: if (s >= sieve_end) { wheel.index =  8; break; }
+      cnt += is_bit<1>(s); unset_bit<1>(s); s += prime * 6 + 1;
+      case  9: if (s >= sieve_end) { wheel.index =  9; break; }
+      cnt += is_bit<5>(s); unset_bit<5>(s); s += prime * 4 + 1;
+      case 10: if (s >= sieve_end) { wheel.index = 10; break; }
+      cnt += is_bit<4>(s); unset_bit<4>(s); s += prime * 2 + 1;
+      case 11: if (s >= sieve_end) { wheel.index = 11; break; }
+      cnt += is_bit<0>(s); unset_bit<0>(s); s += prime * 4 + 0;
+      case 12: if (s >= sieve_end) { wheel.index = 12; break; }
+      cnt += is_bit<7>(s); unset_bit<7>(s); s += prime * 2 + 1;
+      case 13: if (s >= sieve_end) { wheel.index = 13; break; }
+      cnt += is_bit<3>(s); unset_bit<3>(s); s += prime * 4 + 1;
+      case 14: if (s >= sieve_end) { wheel.index = 14; break; }
+      cnt += is_bit<2>(s); unset_bit<2>(s); s += prime * 6 + 1;
+      case 15: if (s >= sieve_end) { wheel.index = 15; break; }
+      cnt += is_bit<6>(s); unset_bit<6>(s); s += prime * 2 + 1;
+
+      while (s + prime * 28 + 6 < sieve_end)
       {
-        if (s >= sieve_end) { wheel.index =  8; break; }
-        case  8: cnt += is_bit<1>(s); unset_bit<1>(s); s += prime * 6 + 1;
-        if (s >= sieve_end) { wheel.index =  9; break; }
-        case  9: cnt += is_bit<5>(s); unset_bit<5>(s); s += prime * 4 + 1;
-        if (s >= sieve_end) { wheel.index = 10; break; }
-        case 10: cnt += is_bit<4>(s); unset_bit<4>(s); s += prime * 2 + 1;
-        if (s >= sieve_end) { wheel.index = 11; break; }
-        case 11: cnt += is_bit<0>(s); unset_bit<0>(s); s += prime * 4 + 0;
-        if (s >= sieve_end) { wheel.index = 12; break; }
-        case 12: cnt += is_bit<7>(s); unset_bit<7>(s); s += prime * 2 + 1;
-        if (s >= sieve_end) { wheel.index = 13; break; }
-        case 13: cnt += is_bit<3>(s); unset_bit<3>(s); s += prime * 4 + 1;
-        if (s >= sieve_end) { wheel.index = 14; break; }
-        case 14: cnt += is_bit<2>(s); unset_bit<2>(s); s += prime * 6 + 1;
-        if (s >= sieve_end) { wheel.index = 15; break; }
-        case 15: cnt += is_bit<6>(s); unset_bit<6>(s); s += prime * 2 + 1;
-
-        while (s + prime * 28 + 6 < sieve_end)
-        {
-          cnt += is_bit<1>(s + prime *  0 + 0);
-          unset_bit<1>(s + prime *  0 + 0);
-          cnt += is_bit<5>(s + prime *  6 + 1);
-          unset_bit<5>(s + prime *  6 + 1);
-          cnt += is_bit<4>(s + prime * 10 + 2);
-          unset_bit<4>(s + prime * 10 + 2);
-          cnt += is_bit<0>(s + prime * 12 + 3);
-          unset_bit<0>(s + prime * 12 + 3);
-          cnt += is_bit<7>(s + prime * 16 + 3);
-          unset_bit<7>(s + prime * 16 + 3);
-          cnt += is_bit<3>(s + prime * 18 + 4);
-          unset_bit<3>(s + prime * 18 + 4);
-          cnt += is_bit<2>(s + prime * 22 + 5);
-          unset_bit<2>(s + prime * 22 + 5);
-          cnt += is_bit<6>(s + prime * 28 + 6);
-          unset_bit<6>(s + prime * 28 + 6);
-          s += prime * 30 + 7;
-        }
+        cnt += is_bit<1>(s + prime *  0 + 0);
+        unset_bit<1>(s + prime *  0 + 0);
+        cnt += is_bit<5>(s + prime *  6 + 1);
+        unset_bit<5>(s + prime *  6 + 1);
+        cnt += is_bit<4>(s + prime * 10 + 2);
+        unset_bit<4>(s + prime * 10 + 2);
+        cnt += is_bit<0>(s + prime * 12 + 3);
+        unset_bit<0>(s + prime * 12 + 3);
+        cnt += is_bit<7>(s + prime * 16 + 3);
+        unset_bit<7>(s + prime * 16 + 3);
+        cnt += is_bit<3>(s + prime * 18 + 4);
+        unset_bit<3>(s + prime * 18 + 4);
+        cnt += is_bit<2>(s + prime * 22 + 5);
+        unset_bit<2>(s + prime * 22 + 5);
+        cnt += is_bit<6>(s + prime * 28 + 6);
+        unset_bit<6>(s + prime * 28 + 6);
+        s += prime * 30 + 7;
       }
-      break;
+    }
+    break;
 
-      for (;;)
+    for (;;)
+    {
+      case 16: if (s >= sieve_end) { wheel.index = 16; break; }
+      cnt += is_bit<2>(s); unset_bit<2>(s); s += prime * 6 + 2;
+      case 17: if (s >= sieve_end) { wheel.index = 17; break; }
+      cnt += is_bit<4>(s); unset_bit<4>(s); s += prime * 4 + 2;
+      case 18: if (s >= sieve_end) { wheel.index = 18; break; }
+      cnt += is_bit<0>(s); unset_bit<0>(s); s += prime * 2 + 0;
+      case 19: if (s >= sieve_end) { wheel.index = 19; break; }
+      cnt += is_bit<6>(s); unset_bit<6>(s); s += prime * 4 + 2;
+      case 20: if (s >= sieve_end) { wheel.index = 20; break; }
+      cnt += is_bit<1>(s); unset_bit<1>(s); s += prime * 2 + 0;
+      case 21: if (s >= sieve_end) { wheel.index = 21; break; }
+      cnt += is_bit<7>(s); unset_bit<7>(s); s += prime * 4 + 2;
+      case 22: if (s >= sieve_end) { wheel.index = 22; break; }
+      cnt += is_bit<3>(s); unset_bit<3>(s); s += prime * 6 + 2;
+      case 23: if (s >= sieve_end) { wheel.index = 23; break; }
+      cnt += is_bit<5>(s); unset_bit<5>(s); s += prime * 2 + 1;
+
+      while (s + prime * 28 + 10 < sieve_end)
       {
-        if (s >= sieve_end) { wheel.index = 16; break; }
-        case 16: cnt += is_bit<2>(s); unset_bit<2>(s); s += prime * 6 + 2;
-        if (s >= sieve_end) { wheel.index = 17; break; }
-        case 17: cnt += is_bit<4>(s); unset_bit<4>(s); s += prime * 4 + 2;
-        if (s >= sieve_end) { wheel.index = 18; break; }
-        case 18: cnt += is_bit<0>(s); unset_bit<0>(s); s += prime * 2 + 0;
-        if (s >= sieve_end) { wheel.index = 19; break; }
-        case 19: cnt += is_bit<6>(s); unset_bit<6>(s); s += prime * 4 + 2;
-        if (s >= sieve_end) { wheel.index = 20; break; }
-        case 20: cnt += is_bit<1>(s); unset_bit<1>(s); s += prime * 2 + 0;
-        if (s >= sieve_end) { wheel.index = 21; break; }
-        case 21: cnt += is_bit<7>(s); unset_bit<7>(s); s += prime * 4 + 2;
-        if (s >= sieve_end) { wheel.index = 22; break; }
-        case 22: cnt += is_bit<3>(s); unset_bit<3>(s); s += prime * 6 + 2;
-        if (s >= sieve_end) { wheel.index = 23; break; }
-        case 23: cnt += is_bit<5>(s); unset_bit<5>(s); s += prime * 2 + 1;
-
-        while (s + prime * 28 + 10 < sieve_end)
-        {
-          cnt += is_bit<2>(s + prime *  0 +  0);
-          unset_bit<2>(s + prime *  0 +  0);
-          cnt += is_bit<4>(s + prime *  6 +  2);
-          unset_bit<4>(s + prime *  6 +  2);
-          cnt += is_bit<0>(s + prime * 10 +  4);
-          unset_bit<0>(s + prime * 10 +  4);
-          cnt += is_bit<6>(s + prime * 12 +  4);
-          unset_bit<6>(s + prime * 12 +  4);
-          cnt += is_bit<1>(s + prime * 16 +  6);
-          unset_bit<1>(s + prime * 16 +  6);
-          cnt += is_bit<7>(s + prime * 18 +  6);
-          unset_bit<7>(s + prime * 18 +  6);
-          cnt += is_bit<3>(s + prime * 22 +  8);
-          unset_bit<3>(s + prime * 22 +  8);
-          cnt += is_bit<5>(s + prime * 28 + 10);
-          unset_bit<5>(s + prime * 28 + 10);
-          s += prime * 30 + 11;
-        }
+        cnt += is_bit<2>(s + prime *  0 +  0);
+        unset_bit<2>(s + prime *  0 +  0);
+        cnt += is_bit<4>(s + prime *  6 +  2);
+        unset_bit<4>(s + prime *  6 +  2);
+        cnt += is_bit<0>(s + prime * 10 +  4);
+        unset_bit<0>(s + prime * 10 +  4);
+        cnt += is_bit<6>(s + prime * 12 +  4);
+        unset_bit<6>(s + prime * 12 +  4);
+        cnt += is_bit<1>(s + prime * 16 +  6);
+        unset_bit<1>(s + prime * 16 +  6);
+        cnt += is_bit<7>(s + prime * 18 +  6);
+        unset_bit<7>(s + prime * 18 +  6);
+        cnt += is_bit<3>(s + prime * 22 +  8);
+        unset_bit<3>(s + prime * 22 +  8);
+        cnt += is_bit<5>(s + prime * 28 + 10);
+        unset_bit<5>(s + prime * 28 + 10);
+        s += prime * 30 + 11;
       }
-      break;
+    }
+    break;
 
-      for (;;)
+    for (;;)
+    {
+      case 24: if (s >= sieve_end) { wheel.index = 24; break; }
+      cnt += is_bit<3>(s); unset_bit<3>(s); s += prime * 6 + 3;
+      case 25: if (s >= sieve_end) { wheel.index = 25; break; }
+      cnt += is_bit<0>(s); unset_bit<0>(s); s += prime * 4 + 1;
+      case 26: if (s >= sieve_end) { wheel.index = 26; break; }
+      cnt += is_bit<6>(s); unset_bit<6>(s); s += prime * 2 + 1;
+      case 27: if (s >= sieve_end) { wheel.index = 27; break; }
+      cnt += is_bit<5>(s); unset_bit<5>(s); s += prime * 4 + 2;
+      case 28: if (s >= sieve_end) { wheel.index = 28; break; }
+      cnt += is_bit<2>(s); unset_bit<2>(s); s += prime * 2 + 1;
+      case 29: if (s >= sieve_end) { wheel.index = 29; break; }
+      cnt += is_bit<1>(s); unset_bit<1>(s); s += prime * 4 + 1;
+      case 30: if (s >= sieve_end) { wheel.index = 30; break; }
+      cnt += is_bit<7>(s); unset_bit<7>(s); s += prime * 6 + 3;
+      case 31: if (s >= sieve_end) { wheel.index = 31; break; }
+      cnt += is_bit<4>(s); unset_bit<4>(s); s += prime * 2 + 1;
+
+      while (s + prime * 28 + 12 < sieve_end)
       {
-        if (s >= sieve_end) { wheel.index = 24; break; }
-        case 24: cnt += is_bit<3>(s); unset_bit<3>(s); s += prime * 6 + 3;
-        if (s >= sieve_end) { wheel.index = 25; break; }
-        case 25: cnt += is_bit<0>(s); unset_bit<0>(s); s += prime * 4 + 1;
-        if (s >= sieve_end) { wheel.index = 26; break; }
-        case 26: cnt += is_bit<6>(s); unset_bit<6>(s); s += prime * 2 + 1;
-        if (s >= sieve_end) { wheel.index = 27; break; }
-        case 27: cnt += is_bit<5>(s); unset_bit<5>(s); s += prime * 4 + 2;
-        if (s >= sieve_end) { wheel.index = 28; break; }
-        case 28: cnt += is_bit<2>(s); unset_bit<2>(s); s += prime * 2 + 1;
-        if (s >= sieve_end) { wheel.index = 29; break; }
-        case 29: cnt += is_bit<1>(s); unset_bit<1>(s); s += prime * 4 + 1;
-        if (s >= sieve_end) { wheel.index = 30; break; }
-        case 30: cnt += is_bit<7>(s); unset_bit<7>(s); s += prime * 6 + 3;
-        if (s >= sieve_end) { wheel.index = 31; break; }
-        case 31: cnt += is_bit<4>(s); unset_bit<4>(s); s += prime * 2 + 1;
-
-        while (s + prime * 28 + 12 < sieve_end)
-        {
-          cnt += is_bit<3>(s + prime *  0 +  0);
-          unset_bit<3>(s + prime *  0 +  0);
-          cnt += is_bit<0>(s + prime *  6 +  3);
-          unset_bit<0>(s + prime *  6 +  3);
-          cnt += is_bit<6>(s + prime * 10 +  4);
-          unset_bit<6>(s + prime * 10 +  4);
-          cnt += is_bit<5>(s + prime * 12 +  5);
-          unset_bit<5>(s + prime * 12 +  5);
-          cnt += is_bit<2>(s + prime * 16 +  7);
-          unset_bit<2>(s + prime * 16 +  7);
-          cnt += is_bit<1>(s + prime * 18 +  8);
-          unset_bit<1>(s + prime * 18 +  8);
-          cnt += is_bit<7>(s + prime * 22 +  9);
-          unset_bit<7>(s + prime * 22 +  9);
-          cnt += is_bit<4>(s + prime * 28 + 12);
-          unset_bit<4>(s + prime * 28 + 12);
-          s += prime * 30 + 13;
-        }
+        cnt += is_bit<3>(s + prime *  0 +  0);
+        unset_bit<3>(s + prime *  0 +  0);
+        cnt += is_bit<0>(s + prime *  6 +  3);
+        unset_bit<0>(s + prime *  6 +  3);
+        cnt += is_bit<6>(s + prime * 10 +  4);
+        unset_bit<6>(s + prime * 10 +  4);
+        cnt += is_bit<5>(s + prime * 12 +  5);
+        unset_bit<5>(s + prime * 12 +  5);
+        cnt += is_bit<2>(s + prime * 16 +  7);
+        unset_bit<2>(s + prime * 16 +  7);
+        cnt += is_bit<1>(s + prime * 18 +  8);
+        unset_bit<1>(s + prime * 18 +  8);
+        cnt += is_bit<7>(s + prime * 22 +  9);
+        unset_bit<7>(s + prime * 22 +  9);
+        cnt += is_bit<4>(s + prime * 28 + 12);
+        unset_bit<4>(s + prime * 28 + 12);
+        s += prime * 30 + 13;
       }
-      break;
+    }
+    break;
 
-      for (;;)
+    for (;;)
+    {
+      case 32: if (s >= sieve_end) { wheel.index = 32; break; }
+      cnt += is_bit<4>(s); unset_bit<4>(s); s += prime * 6 + 3;
+      case 33: if (s >= sieve_end) { wheel.index = 33; break; }
+      cnt += is_bit<7>(s); unset_bit<7>(s); s += prime * 4 + 3;
+      case 34: if (s >= sieve_end) { wheel.index = 34; break; }
+      cnt += is_bit<1>(s); unset_bit<1>(s); s += prime * 2 + 1;
+      case 35: if (s >= sieve_end) { wheel.index = 35; break; }
+      cnt += is_bit<2>(s); unset_bit<2>(s); s += prime * 4 + 2;
+      case 36: if (s >= sieve_end) { wheel.index = 36; break; }
+      cnt += is_bit<5>(s); unset_bit<5>(s); s += prime * 2 + 1;
+      case 37: if (s >= sieve_end) { wheel.index = 37; break; }
+      cnt += is_bit<6>(s); unset_bit<6>(s); s += prime * 4 + 3;
+      case 38: if (s >= sieve_end) { wheel.index = 38; break; }
+      cnt += is_bit<0>(s); unset_bit<0>(s); s += prime * 6 + 3;
+      case 39: if (s >= sieve_end) { wheel.index = 39; break; }
+      cnt += is_bit<3>(s); unset_bit<3>(s); s += prime * 2 + 1;
+
+      while (s + prime * 28 + 16 < sieve_end)
       {
-        if (s >= sieve_end) { wheel.index = 32; break; }
-        case 32: cnt += is_bit<4>(s); unset_bit<4>(s); s += prime * 6 + 3;
-        if (s >= sieve_end) { wheel.index = 33; break; }
-        case 33: cnt += is_bit<7>(s); unset_bit<7>(s); s += prime * 4 + 3;
-        if (s >= sieve_end) { wheel.index = 34; break; }
-        case 34: cnt += is_bit<1>(s); unset_bit<1>(s); s += prime * 2 + 1;
-        if (s >= sieve_end) { wheel.index = 35; break; }
-        case 35: cnt += is_bit<2>(s); unset_bit<2>(s); s += prime * 4 + 2;
-        if (s >= sieve_end) { wheel.index = 36; break; }
-        case 36: cnt += is_bit<5>(s); unset_bit<5>(s); s += prime * 2 + 1;
-        if (s >= sieve_end) { wheel.index = 37; break; }
-        case 37: cnt += is_bit<6>(s); unset_bit<6>(s); s += prime * 4 + 3;
-        if (s >= sieve_end) { wheel.index = 38; break; }
-        case 38: cnt += is_bit<0>(s); unset_bit<0>(s); s += prime * 6 + 3;
-        if (s >= sieve_end) { wheel.index = 39; break; }
-        case 39: cnt += is_bit<3>(s); unset_bit<3>(s); s += prime * 2 + 1;
-
-        while (s + prime * 28 + 16 < sieve_end)
-        {
-          cnt += is_bit<4>(s + prime *  0 +  0);
-          unset_bit<4>(s + prime *  0 +  0);
-          cnt += is_bit<7>(s + prime *  6 +  3);
-          unset_bit<7>(s + prime *  6 +  3);
-          cnt += is_bit<1>(s + prime * 10 +  6);
-          unset_bit<1>(s + prime * 10 +  6);
-          cnt += is_bit<2>(s + prime * 12 +  7);
-          unset_bit<2>(s + prime * 12 +  7);
-          cnt += is_bit<5>(s + prime * 16 +  9);
-          unset_bit<5>(s + prime * 16 +  9);
-          cnt += is_bit<6>(s + prime * 18 + 10);
-          unset_bit<6>(s + prime * 18 + 10);
-          cnt += is_bit<0>(s + prime * 22 + 13);
-          unset_bit<0>(s + prime * 22 + 13);
-          cnt += is_bit<3>(s + prime * 28 + 16);
-          unset_bit<3>(s + prime * 28 + 16);
-          s += prime * 30 + 17;
-        }
+        cnt += is_bit<4>(s + prime *  0 +  0);
+        unset_bit<4>(s + prime *  0 +  0);
+        cnt += is_bit<7>(s + prime *  6 +  3);
+        unset_bit<7>(s + prime *  6 +  3);
+        cnt += is_bit<1>(s + prime * 10 +  6);
+        unset_bit<1>(s + prime * 10 +  6);
+        cnt += is_bit<2>(s + prime * 12 +  7);
+        unset_bit<2>(s + prime * 12 +  7);
+        cnt += is_bit<5>(s + prime * 16 +  9);
+        unset_bit<5>(s + prime * 16 +  9);
+        cnt += is_bit<6>(s + prime * 18 + 10);
+        unset_bit<6>(s + prime * 18 + 10);
+        cnt += is_bit<0>(s + prime * 22 + 13);
+        unset_bit<0>(s + prime * 22 + 13);
+        cnt += is_bit<3>(s + prime * 28 + 16);
+        unset_bit<3>(s + prime * 28 + 16);
+        s += prime * 30 + 17;
       }
-      break;
+    }
+    break;
 
-      for (;;)
+    for (;;)
+    {
+      case 40: if (s >= sieve_end) { wheel.index = 40; break; }
+      cnt += is_bit<5>(s); unset_bit<5>(s); s += prime * 6 + 4;
+      case 41: if (s >= sieve_end) { wheel.index = 41; break; }
+      cnt += is_bit<3>(s); unset_bit<3>(s); s += prime * 4 + 2;
+      case 42: if (s >= sieve_end) { wheel.index = 42; break; }
+      cnt += is_bit<7>(s); unset_bit<7>(s); s += prime * 2 + 2;
+      case 43: if (s >= sieve_end) { wheel.index = 43; break; }
+      cnt += is_bit<1>(s); unset_bit<1>(s); s += prime * 4 + 2;
+      case 44: if (s >= sieve_end) { wheel.index = 44; break; }
+      cnt += is_bit<6>(s); unset_bit<6>(s); s += prime * 2 + 2;
+      case 45: if (s >= sieve_end) { wheel.index = 45; break; }
+      cnt += is_bit<0>(s); unset_bit<0>(s); s += prime * 4 + 2;
+      case 46: if (s >= sieve_end) { wheel.index = 46; break; }
+      cnt += is_bit<4>(s); unset_bit<4>(s); s += prime * 6 + 4;
+      case 47: if (s >= sieve_end) { wheel.index = 47; break; }
+      cnt += is_bit<2>(s); unset_bit<2>(s); s += prime * 2 + 1;
+
+      while (s + prime * 28 + 18 < sieve_end)
       {
-        if (s >= sieve_end) { wheel.index = 40; break; }
-        case 40: cnt += is_bit<5>(s); unset_bit<5>(s); s += prime * 6 + 4;
-        if (s >= sieve_end) { wheel.index = 41; break; }
-        case 41: cnt += is_bit<3>(s); unset_bit<3>(s); s += prime * 4 + 2;
-        if (s >= sieve_end) { wheel.index = 42; break; }
-        case 42: cnt += is_bit<7>(s); unset_bit<7>(s); s += prime * 2 + 2;
-        if (s >= sieve_end) { wheel.index = 43; break; }
-        case 43: cnt += is_bit<1>(s); unset_bit<1>(s); s += prime * 4 + 2;
-        if (s >= sieve_end) { wheel.index = 44; break; }
-        case 44: cnt += is_bit<6>(s); unset_bit<6>(s); s += prime * 2 + 2;
-        if (s >= sieve_end) { wheel.index = 45; break; }
-        case 45: cnt += is_bit<0>(s); unset_bit<0>(s); s += prime * 4 + 2;
-        if (s >= sieve_end) { wheel.index = 46; break; }
-        case 46: cnt += is_bit<4>(s); unset_bit<4>(s); s += prime * 6 + 4;
-        if (s >= sieve_end) { wheel.index = 47; break; }
-        case 47: cnt += is_bit<2>(s); unset_bit<2>(s); s += prime * 2 + 1;
-
-        while (s + prime * 28 + 18 < sieve_end)
-        {
-          cnt += is_bit<5>(s + prime *  0 +  0);
-          unset_bit<5>(s + prime *  0 +  0);
-          cnt += is_bit<3>(s + prime *  6 +  4);
-          unset_bit<3>(s + prime *  6 +  4);
-          cnt += is_bit<7>(s + prime * 10 +  6);
-          unset_bit<7>(s + prime * 10 +  6);
-          cnt += is_bit<1>(s + prime * 12 +  8);
-          unset_bit<1>(s + prime * 12 +  8);
-          cnt += is_bit<6>(s + prime * 16 + 10);
-          unset_bit<6>(s + prime * 16 + 10);
-          cnt += is_bit<0>(s + prime * 18 + 12);
-          unset_bit<0>(s + prime * 18 + 12);
-          cnt += is_bit<4>(s + prime * 22 + 14);
-          unset_bit<4>(s + prime * 22 + 14);
-          cnt += is_bit<2>(s + prime * 28 + 18);
-          unset_bit<2>(s + prime * 28 + 18);
-          s += prime * 30 + 19;
-        }
+        cnt += is_bit<5>(s + prime *  0 +  0);
+        unset_bit<5>(s + prime *  0 +  0);
+        cnt += is_bit<3>(s + prime *  6 +  4);
+        unset_bit<3>(s + prime *  6 +  4);
+        cnt += is_bit<7>(s + prime * 10 +  6);
+        unset_bit<7>(s + prime * 10 +  6);
+        cnt += is_bit<1>(s + prime * 12 +  8);
+        unset_bit<1>(s + prime * 12 +  8);
+        cnt += is_bit<6>(s + prime * 16 + 10);
+        unset_bit<6>(s + prime * 16 + 10);
+        cnt += is_bit<0>(s + prime * 18 + 12);
+        unset_bit<0>(s + prime * 18 + 12);
+        cnt += is_bit<4>(s + prime * 22 + 14);
+        unset_bit<4>(s + prime * 22 + 14);
+        cnt += is_bit<2>(s + prime * 28 + 18);
+        unset_bit<2>(s + prime * 28 + 18);
+        s += prime * 30 + 19;
       }
-      break;
+    }
+    break;
 
-      for (;;)
+    for (;;)
+    {
+      case 48: if (s >= sieve_end) { wheel.index = 48; break; }
+      cnt += is_bit<6>(s); unset_bit<6>(s); s += prime * 6 + 5;
+      case 49: if (s >= sieve_end) { wheel.index = 49; break; }
+      cnt += is_bit<2>(s); unset_bit<2>(s); s += prime * 4 + 3;
+      case 50: if (s >= sieve_end) { wheel.index = 50; break; }
+      cnt += is_bit<3>(s); unset_bit<3>(s); s += prime * 2 + 1;
+      case 51: if (s >= sieve_end) { wheel.index = 51; break; }
+      cnt += is_bit<7>(s); unset_bit<7>(s); s += prime * 4 + 4;
+      case 52: if (s >= sieve_end) { wheel.index = 52; break; }
+      cnt += is_bit<0>(s); unset_bit<0>(s); s += prime * 2 + 1;
+      case 53: if (s >= sieve_end) { wheel.index = 53; break; }
+      cnt += is_bit<4>(s); unset_bit<4>(s); s += prime * 4 + 3;
+      case 54: if (s >= sieve_end) { wheel.index = 54; break; }
+      cnt += is_bit<5>(s); unset_bit<5>(s); s += prime * 6 + 5;
+      case 55: if (s >= sieve_end) { wheel.index = 55; break; }
+      cnt += is_bit<1>(s); unset_bit<1>(s); s += prime * 2 + 1;
+
+      while (s + prime * 28 + 22 < sieve_end)
       {
-        if (s >= sieve_end) { wheel.index = 48; break; }
-        case 48: cnt += is_bit<6>(s); unset_bit<6>(s); s += prime * 6 + 5;
-        if (s >= sieve_end) { wheel.index = 49; break; }
-        case 49: cnt += is_bit<2>(s); unset_bit<2>(s); s += prime * 4 + 3;
-        if (s >= sieve_end) { wheel.index = 50; break; }
-        case 50: cnt += is_bit<3>(s); unset_bit<3>(s); s += prime * 2 + 1;
-        if (s >= sieve_end) { wheel.index = 51; break; }
-        case 51: cnt += is_bit<7>(s); unset_bit<7>(s); s += prime * 4 + 4;
-        if (s >= sieve_end) { wheel.index = 52; break; }
-        case 52: cnt += is_bit<0>(s); unset_bit<0>(s); s += prime * 2 + 1;
-        if (s >= sieve_end) { wheel.index = 53; break; }
-        case 53: cnt += is_bit<4>(s); unset_bit<4>(s); s += prime * 4 + 3;
-        if (s >= sieve_end) { wheel.index = 54; break; }
-        case 54: cnt += is_bit<5>(s); unset_bit<5>(s); s += prime * 6 + 5;
-        if (s >= sieve_end) { wheel.index = 55; break; }
-        case 55: cnt += is_bit<1>(s); unset_bit<1>(s); s += prime * 2 + 1;
-
-        while (s + prime * 28 + 22 < sieve_end)
-        {
-          cnt += is_bit<6>(s + prime *  0 +  0);
-          unset_bit<6>(s + prime *  0 +  0);
-          cnt += is_bit<2>(s + prime *  6 +  5);
-          unset_bit<2>(s + prime *  6 +  5);
-          cnt += is_bit<3>(s + prime * 10 +  8);
-          unset_bit<3>(s + prime * 10 +  8);
-          cnt += is_bit<7>(s + prime * 12 +  9);
-          unset_bit<7>(s + prime * 12 +  9);
-          cnt += is_bit<0>(s + prime * 16 + 13);
-          unset_bit<0>(s + prime * 16 + 13);
-          cnt += is_bit<4>(s + prime * 18 + 14);
-          unset_bit<4>(s + prime * 18 + 14);
-          cnt += is_bit<5>(s + prime * 22 + 17);
-          unset_bit<5>(s + prime * 22 + 17);
-          cnt += is_bit<1>(s + prime * 28 + 22);
-          unset_bit<1>(s + prime * 28 + 22);
-          s += prime * 30 + 23;
-        }
+        cnt += is_bit<6>(s + prime *  0 +  0);
+        unset_bit<6>(s + prime *  0 +  0);
+        cnt += is_bit<2>(s + prime *  6 +  5);
+        unset_bit<2>(s + prime *  6 +  5);
+        cnt += is_bit<3>(s + prime * 10 +  8);
+        unset_bit<3>(s + prime * 10 +  8);
+        cnt += is_bit<7>(s + prime * 12 +  9);
+        unset_bit<7>(s + prime * 12 +  9);
+        cnt += is_bit<0>(s + prime * 16 + 13);
+        unset_bit<0>(s + prime * 16 + 13);
+        cnt += is_bit<4>(s + prime * 18 + 14);
+        unset_bit<4>(s + prime * 18 + 14);
+        cnt += is_bit<5>(s + prime * 22 + 17);
+        unset_bit<5>(s + prime * 22 + 17);
+        cnt += is_bit<1>(s + prime * 28 + 22);
+        unset_bit<1>(s + prime * 28 + 22);
+        s += prime * 30 + 23;
       }
-      break;
+    }
+    break;
 
-      for (;;)
+    for (;;)
+    {
+      case 56: if (s >= sieve_end) { wheel.index = 56; break; }
+      cnt += is_bit<7>(s); unset_bit<7>(s); s += prime * 6 + 6;
+      case 57: if (s >= sieve_end) { wheel.index = 57; break; }
+      cnt += is_bit<6>(s); unset_bit<6>(s); s += prime * 4 + 4;
+      case 58: if (s >= sieve_end) { wheel.index = 58; break; }
+      cnt += is_bit<5>(s); unset_bit<5>(s); s += prime * 2 + 2;
+      case 59: if (s >= sieve_end) { wheel.index = 59; break; }
+      cnt += is_bit<4>(s); unset_bit<4>(s); s += prime * 4 + 4;
+      case 60: if (s >= sieve_end) { wheel.index = 60; break; }
+      cnt += is_bit<3>(s); unset_bit<3>(s); s += prime * 2 + 2;
+      case 61: if (s >= sieve_end) { wheel.index = 61; break; }
+      cnt += is_bit<2>(s); unset_bit<2>(s); s += prime * 4 + 4;
+      case 62: if (s >= sieve_end) { wheel.index = 62; break; }
+      cnt += is_bit<1>(s); unset_bit<1>(s); s += prime * 6 + 6;
+      case 63: if (s >= sieve_end) { wheel.index = 63; break; }
+      cnt += is_bit<0>(s); unset_bit<0>(s); s += prime * 2 + 1;
+
+      while (s + prime * 28 + 28 < sieve_end)
       {
-        if (s >= sieve_end) { wheel.index = 56; break; }
-        case 56: cnt += is_bit<7>(s); unset_bit<7>(s); s += prime * 6 + 6;
-        if (s >= sieve_end) { wheel.index = 57; break; }
-        case 57: cnt += is_bit<6>(s); unset_bit<6>(s); s += prime * 4 + 4;
-        if (s >= sieve_end) { wheel.index = 58; break; }
-        case 58: cnt += is_bit<5>(s); unset_bit<5>(s); s += prime * 2 + 2;
-        if (s >= sieve_end) { wheel.index = 59; break; }
-        case 59: cnt += is_bit<4>(s); unset_bit<4>(s); s += prime * 4 + 4;
-        if (s >= sieve_end) { wheel.index = 60; break; }
-        case 60: cnt += is_bit<3>(s); unset_bit<3>(s); s += prime * 2 + 2;
-        if (s >= sieve_end) { wheel.index = 61; break; }
-        case 61: cnt += is_bit<2>(s); unset_bit<2>(s); s += prime * 4 + 4;
-        if (s >= sieve_end) { wheel.index = 62; break; }
-        case 62: cnt += is_bit<1>(s); unset_bit<1>(s); s += prime * 6 + 6;
-        if (s >= sieve_end) { wheel.index = 63; break; }
-        case 63: cnt += is_bit<0>(s); unset_bit<0>(s); s += prime * 2 + 1;
-
-        while (s + prime * 28 + 28 < sieve_end)
-        {
-          cnt += is_bit<7>(s + prime *  0 +  0);
-          unset_bit<7>(s + prime *  0 +  0);
-          cnt += is_bit<6>(s + prime *  6 +  6);
-          unset_bit<6>(s + prime *  6 +  6);
-          cnt += is_bit<5>(s + prime * 10 + 10);
-          unset_bit<5>(s + prime * 10 + 10);
-          cnt += is_bit<4>(s + prime * 12 + 12);
-          unset_bit<4>(s + prime * 12 + 12);
-          cnt += is_bit<3>(s + prime * 16 + 16);
-          unset_bit<3>(s + prime * 16 + 16);
-          cnt += is_bit<2>(s + prime * 18 + 18);
-          unset_bit<2>(s + prime * 18 + 18);
-          cnt += is_bit<1>(s + prime * 22 + 22);
-          unset_bit<1>(s + prime * 22 + 22);
-          cnt += is_bit<0>(s + prime * 28 + 28);
-          unset_bit<0>(s + prime * 28 + 28);
-          s += prime * 30 + 29;
-        }
+        cnt += is_bit<7>(s + prime *  0 +  0);
+        unset_bit<7>(s + prime *  0 +  0);
+        cnt += is_bit<6>(s + prime *  6 +  6);
+        unset_bit<6>(s + prime *  6 +  6);
+        cnt += is_bit<5>(s + prime * 10 + 10);
+        unset_bit<5>(s + prime * 10 + 10);
+        cnt += is_bit<4>(s + prime * 12 + 12);
+        unset_bit<4>(s + prime * 12 + 12);
+        cnt += is_bit<3>(s + prime * 16 + 16);
+        unset_bit<3>(s + prime * 16 + 16);
+        cnt += is_bit<2>(s + prime * 18 + 18);
+        unset_bit<2>(s + prime * 18 + 18);
+        cnt += is_bit<1>(s + prime * 22 + 22);
+        unset_bit<1>(s + prime * 22 + 22);
+        cnt += is_bit<0>(s + prime * 28 + 28);
+        unset_bit<0>(s + prime * 28 + 28);
+        s += prime * 30 + 29;
       }
-      break;
     }
-
-    // update for the next segment
-    wheel.multiple = (uint32_t) (s - sieve_end);
+    break;
   }
 
+  // update for the next segment
+  wheel.multiple = (uint32_t) (s - sieve_end);
+
   return cnt;
 }