Blob Blame History Raw

# HG changeset patch
# User snazarki
# Date 1471254682 -10800
# Node ID d82a138c4129ba1a47f95abfeac91d79cd7e030d
# Parent  36fd104e90c53c5ee5f8962b288cb49193918fed
8163469: aarch32: add support for ARMv6K CPU
Reviewed-by: duke
Contributed-by: andrey.petushkov@gmail.com

diff -r 36fd104e90c5 -r d82a138c4129 src/cpu/aarch32/vm/assembler_aarch32.cpp
--- openjdk/hotspot/src/cpu/aarch32/vm/assembler_aarch32.cpp	Fri Aug 12 18:10:58 2016 +0300
+++ openjdk/hotspot/src/cpu/aarch32/vm/assembler_aarch32.cpp	Mon Aug 15 12:51:22 2016 +0300
@@ -61,12 +61,16 @@
   //   printf("\n");
   // }
 
+#ifdef ASSERT
   Assembler _masm(cb);
   address entry = __ pc();
 
   // Smoke test for assembler
+  // we're checking the code generation, not applicability of the code to the actual target
+  // so temporarily override the detected cpu to allow emission of all instructions
+  const ProcessorFeatures detected_features = VM_Version::features();
+  VM_Version::features(FT_ALL);
 
-#ifdef ASSERT
 // BEGIN  Generated code -- do not edit
 // Generated by aarch32-asmtest.py
     Label back, forth, near, near_post, near_flt, near_post_flt;
@@ -1203,6 +1207,10 @@
     0xe120017a,
   };
 // END  Generated code -- do not edit
+
+  // reset the detected cpu feature set
+  VM_Version::features(detected_features);
+
   {
     bool ok = true;
     unsigned int *insns1 = (unsigned int *)entry;
@@ -1408,7 +1416,6 @@
   }
 }
 
-
 bool Address::offset_ok_for_immed(long offset, InsnDataType type) {
   const int o = offset < 0 ? -offset : offset;
   switch (type) {
diff -r 36fd104e90c5 -r d82a138c4129 src/cpu/aarch32/vm/assembler_aarch32.hpp
--- openjdk/hotspot/src/cpu/aarch32/vm/assembler_aarch32.hpp	Fri Aug 12 18:10:58 2016 +0300
+++ openjdk/hotspot/src/cpu/aarch32/vm/assembler_aarch32.hpp	Mon Aug 15 12:51:22 2016 +0300
@@ -28,6 +28,7 @@
 #define CPU_AARCH32_VM_ASSEMBLER_AARCH32_HPP
 
 #include "asm/register.hpp"
+#include "vm_version_aarch32.hpp"
 
 // Definitions of various symbolic names for machine registers
 
@@ -619,6 +620,8 @@
 
   enum { instruction_size = 4 };
 
+  static const uint32_t nop_insn = 0xe1a00000;
+
   Address adjust(Register base, int offset, bool preIncrement) {
     if (preIncrement)
       return Address(base, offset, Address::pre);
@@ -1416,6 +1419,7 @@
 #undef INSN
 
 void bfi(Register Rd, Register Rn, int lsb, int width, Condition cond = C_DFLT) {
+  assert(VM_Version::features() & (FT_ARMV6T2 | FT_ARMV7), "unsupported on the cpu");
   int msb = lsb + width - 1;
   assert(lsb >= 0 && lsb < 32, "lsb out of range");
   assert(msb < 32 && msb >= lsb, "width out of range");
@@ -1425,6 +1429,7 @@
 }
 
 void bfc(Register Rd, int lsb, int width, Condition cond = C_DFLT) {
+  assert(VM_Version::features() & (FT_ARMV6T2 | FT_ARMV7), "unsupported on the cpu");
   int msb = lsb + width - 1;
   assert(lsb >= 0 && lsb < 32, "lsb out of range");
   assert(msb < 32 && msb >= lsb, "width out of range");
diff -r 36fd104e90c5 -r d82a138c4129 src/cpu/aarch32/vm/interp_masm_aarch32.cpp
--- openjdk/hotspot/src/cpu/aarch32/vm/interp_masm_aarch32.cpp	Fri Aug 12 18:10:58 2016 +0300
+++ openjdk/hotspot/src/cpu/aarch32/vm/interp_masm_aarch32.cpp	Mon Aug 15 12:51:22 2016 +0300
@@ -61,12 +61,12 @@
   andr(result, result, 0x1, Assembler::EQ);
 
   cmp(rscratch1, T_BYTE);
-  sbfx(result, result, 0, 8, Assembler::EQ);
+  sxtb(result, result, Assembler::ror(), Assembler::EQ);
 
   cmp(rscratch1, T_CHAR);
-  ubfx(result, result, 0, 16, Assembler::EQ);  // truncate upper 16 bits
+  uxth(result, result, Assembler::ror(), Assembler::EQ);  // truncate upper 16 bits
 
-  sbfx(result, result, 0, 16, Assembler::NE);  // sign-extend short
+  sxth(result, result, Assembler::ror(), Assembler::NE);  // sign-extend short
 
   // Nothing to do for T_INT
   bind(done);
diff -r 36fd104e90c5 -r d82a138c4129 src/cpu/aarch32/vm/macroAssembler_aarch32.cpp
--- openjdk/hotspot/src/cpu/aarch32/vm/macroAssembler_aarch32.cpp	Fri Aug 12 18:10:58 2016 +0300
+++ openjdk/hotspot/src/cpu/aarch32/vm/macroAssembler_aarch32.cpp	Mon Aug 15 12:51:22 2016 +0300
@@ -171,11 +171,10 @@
       assert(nativeInstruction_at(&insn_buf[2])->is_orr(), "wrong insns in patch");
       assert(nativeInstruction_at(&insn_buf[3])->is_orr(), "wrong insns in patch");
       u_int32_t addr;
-      // TODO Check that the rotations are in the expected order.
-      addr  = Instruction_aarch32::extract(insn_buf[0], 7, 0) << 0;
-      addr |= Instruction_aarch32::extract(insn_buf[1], 7, 0) << 8;
-      addr |= Instruction_aarch32::extract(insn_buf[2], 7, 0) << 16;
-      addr |= Instruction_aarch32::extract(insn_buf[3], 7, 0) << 24;
+      addr  = Assembler::decode_imm12(Instruction_aarch32::extract(insn_buf[0], 11, 0));
+      addr |= Assembler::decode_imm12(Instruction_aarch32::extract(insn_buf[1], 11, 0));
+      addr |= Assembler::decode_imm12(Instruction_aarch32::extract(insn_buf[2], 11, 0));
+      addr |= Assembler::decode_imm12(Instruction_aarch32::extract(insn_buf[3], 11, 0));
       return address(addr);
     } else {
       ShouldNotReachHere();
@@ -627,9 +626,12 @@
 
   // Have make trampline such way: destination address should be raw 4 byte value,
   // so it's patching could be done atomically.
-  add(lr, r15_pc, 4); // pc is this addr + 8
+  add(lr, r15_pc, NativeCall::instruction_size - 2 * NativeInstruction::arm_insn_sz);
   ldr(r15_pc, Address(r15_pc, 4)); // Address does correction for offset from pc base
   emit_int32((uintptr_t) entry.target());
+  // possibly pad the call to the NativeCall size to make patching happy
+  for (int i = NativeCall::instruction_size; i > 3 * NativeInstruction::arm_insn_sz; i -= NativeInstruction::arm_insn_sz)
+    nop();
 }
 
 void MacroAssembler::ic_call(address entry) {
@@ -3226,3 +3228,22 @@
   BIND(L_exit);
     inv(crc, crc);
 }
+
+void MacroAssembler::bfc_impl(Register Rd, int lsb, int width, Condition cond) {
+  if (width > 15 && lsb == 0) {
+    lsr(Rd, Rd, width);
+    lsl(Rd, Rd, width);
+  } else if (width > 15 && lsb + width == 32) {
+    lsl(Rd, Rd, 32 - lsb);
+    lsr(Rd, Rd, 32 - lsb);
+  } else {
+    const int lsb1 = (lsb & 1);
+    int w1 = width <= 8 - lsb1 ? width : 8 - lsb1;
+    while (width) {
+      bic(Rd, Rd, ((1 << w1) - 1) << lsb);
+      width -= w1;
+      lsb += w1;
+      w1 = width > 8 ? 8 : width;
+    }
+  }
+}
diff -r 36fd104e90c5 -r d82a138c4129 src/cpu/aarch32/vm/macroAssembler_aarch32.hpp
--- openjdk/hotspot/src/cpu/aarch32/vm/macroAssembler_aarch32.hpp	Fri Aug 12 18:10:58 2016 +0300
+++ openjdk/hotspot/src/cpu/aarch32/vm/macroAssembler_aarch32.hpp	Mon Aug 15 12:51:22 2016 +0300
@@ -799,10 +799,19 @@
   using Assembler::strd;
   int strd(Register Rt, Register Rt2, const Address& adr, Condition cond = C_DFLT);
 
+private:
+  void bfc_impl(Register rd, int lsb, int width, Condition cond);
+public:
+  void bfc(Register Rd, int lsb, int width, Condition cond = C_DFLT) {
+    if (VM_Version::features() & (FT_ARMV6T2 | FT_ARMV7))
+      Assembler::bfc(Rd, lsb, width, cond);
+    else
+      bfc_impl(Rd, lsb, width, cond);
+  }
+
   void align_stack() {
-    // sp &= ~StackAlignmentInBytes, assuming it's the power of 2
     if (StackAlignmentInBytes > 4)
-      bfc(sp, 0, __builtin_ctz(StackAlignmentInBytes));
+      bic(sp, sp, StackAlignmentInBytes-1);
   }
 
 #ifdef ASSERT
diff -r 36fd104e90c5 -r d82a138c4129 src/cpu/aarch32/vm/nativeInst_aarch32.cpp
--- openjdk/hotspot/src/cpu/aarch32/vm/nativeInst_aarch32.cpp	Fri Aug 12 18:10:58 2016 +0300
+++ openjdk/hotspot/src/cpu/aarch32/vm/nativeInst_aarch32.cpp	Mon Aug 15 12:51:22 2016 +0300
@@ -39,7 +39,8 @@
 #endif
 
 // LIRAssembler fills patching site with nops up to NativeCall::instruction_size
-static const int patching_copy_buff_len = NativeCall::instruction_size;
+int NativeCall::instruction_size = 5 * arm_insn_sz;
+#define patching_copy_buff_len (NativeCall::instruction_size)
 
 NativeInstruction* NativeInstruction::from(address addr) {
   return (NativeInstruction*) addr;
@@ -47,6 +48,10 @@
 
 //-------------------------------------------------------------------
 
+void NativeCall::init() {
+  instruction_size = (VM_Version::features() & (FT_ARMV6T2 | FT_ARMV7) ? 3 : 5) * arm_insn_sz;
+}
+
 void NativeCall::verify() {
   if (!is_call()) {
     fatal("not a call");
@@ -166,8 +171,8 @@
 }
 
 bool NativeTrampolineCall::is_at(address addr) {
-  return as_uint(addr    ) == 0xe28fe004    // add     lr, pc, #4
-      && as_uint(addr + 4) == 0xe51ff004;   // ldr     pc, [pc, -4]
+  return (as_uint(addr    ) & ~0xffu) == 0xe28fe000  // add     lr, pc, #disp
+       && as_uint(addr + 4)          == 0xe51ff004; // ldr     pc, [pc, -4]
 }
 
 NativeTrampolineCall* NativeTrampolineCall::from(address addr) {
@@ -266,11 +271,18 @@
   return (Instruction_aarch32::extract(insn, 27, 16) & 0b111001011111) == 0b010000011111;
 }
 
-bool NativeMovConstReg::is_at(address addr) {
-  return NativeMovConstReg::is_movw_movt_at(addr) ||
-    NativeMovConstReg::is_ldr_literal_at(addr);
+bool NativeMovConstReg::is_mov_n_three_orr_at(address addr) {
+  return (Instruction_aarch32::extract(as_uint(addr), 27, 16) & 0b111111101111) == 0b001110100000 &&
+          Instruction_aarch32::extract(as_uint(addr+arm_insn_sz), 27, 20) == 0b00111000 &&
+          Instruction_aarch32::extract(as_uint(addr+2*arm_insn_sz), 27, 20) == 0b00111000 &&
+          Instruction_aarch32::extract(as_uint(addr+3*arm_insn_sz), 27, 21) == 0b0011100;
 }
 
+bool NativeMovConstReg::is_at(address addr) {
+  return is_ldr_literal_at(addr) ||
+          is_movw_movt_at(addr) ||
+          is_mov_n_three_orr_at(addr);
+}
 
 //-------------------------------------------------------------------
 // TODO review
diff -r 36fd104e90c5 -r d82a138c4129 src/cpu/aarch32/vm/nativeInst_aarch32.hpp
--- openjdk/hotspot/src/cpu/aarch32/vm/nativeInst_aarch32.hpp	Fri Aug 12 18:10:58 2016 +0300
+++ openjdk/hotspot/src/cpu/aarch32/vm/nativeInst_aarch32.hpp	Mon Aug 15 12:51:22 2016 +0300
@@ -158,11 +158,13 @@
  protected:
   static bool is_movw_movt_at(address instr);
   static bool is_ldr_literal_at(address instr);
+  static bool is_mov_n_three_orr_at(address instr);
  public:
   enum {
     movw_movt_pair_sz = 2 * arm_insn_sz,
+    mov_n_three_orr_sz = 4 * arm_insn_sz,
     ldr_sz = arm_insn_sz,
-    max_instruction_size = movw_movt_pair_sz,
+    max_instruction_size = mov_n_three_orr_sz,
     min_instruction_size = ldr_sz,
   };
 
@@ -171,6 +173,8 @@
       return addr() + movw_movt_pair_sz;
     } else if (is_ldr_literal_at(addr())) {
       return addr() + ldr_sz;
+    } else if (is_mov_n_three_orr_at(addr())) {
+      return addr() + mov_n_three_orr_sz;
     }
 
     // Unknown instruction in NativeMovConstReg
@@ -207,23 +211,6 @@
   return NativeMovConstReg::from(address);
 }
 
-inline NativeMovConstReg* nativeMovConstReg_before(address addr) {
-  address mov_addr = NULL;
-  if (NativeMovConstReg::is_movw_movt_at(addr - NativeMovConstReg::movw_movt_pair_sz)) {
-    mov_addr = addr - NativeMovConstReg::movw_movt_pair_sz;
-  } else if (NativeMovConstReg::is_ldr_literal_at(addr - NativeMovConstReg::ldr_sz)) {
-    mov_addr = addr - NativeMovConstReg::ldr_sz;
-  } else {
-    ShouldNotReachHere();
-  }
-
-  NativeMovConstReg* test = (NativeMovConstReg*) mov_addr;
-#ifdef ASSERT
-  test->verify();
-#endif
-  return test;
-}
-
 class NativeTrampolineCall: public NativeBranchType {
  public:
   enum {
@@ -236,10 +223,7 @@
   static bool is_at(address address);
   static NativeTrampolineCall* from(address address);
 
-  address next_instruction_address() const  {
-    assert(is_at(addr()), "not call");
-    return addr() + instruction_size;
-  }
+  address next_instruction_address() const;
 };
 
 class NativeRegCall: public NativeBranchType {
@@ -263,17 +247,22 @@
   //  NativeTrampolineCall
  public:
   enum {
-    instruction_size = 3 * arm_insn_sz
+    max_instruction_size = 5 * arm_insn_sz
   };
+
+  static int instruction_size;
 #ifdef ASSERT
-  StaticAssert<(int) NativeTrampolineCall::instruction_size <= (int) instruction_size> dummy1;
+  StaticAssert<(int) NativeTrampolineCall::instruction_size <= (int) max_instruction_size> dummy1;
   StaticAssert<NativeMovConstReg::movw_movt_pair_sz
-      + NativeRegCall::instruction_size <= (int) instruction_size> dummy2;
+      + NativeRegCall::instruction_size <= (int) max_instruction_size> dummy2;
+  StaticAssert<NativeMovConstReg::mov_n_three_orr_sz
+      + NativeRegCall::instruction_size <= (int) max_instruction_size> dummy3;
 #endif
 
   address destination() const;
   void set_destination(address dest);
 
+  static void init();
   void  verify_alignment()                       { ; }
   void  verify();
   void  print();
@@ -306,6 +295,11 @@
   static bool is_call_before(address return_address);
 };
 
+inline address NativeTrampolineCall::next_instruction_address() const {
+  assert(is_at(addr()), "not call");
+  return addr() + NativeCall::instruction_size;
+}
+
 inline NativeCall* nativeCall_at(address address) {
   return NativeCall::from(address);
 }
diff -r 36fd104e90c5 -r d82a138c4129 src/cpu/aarch32/vm/sharedRuntime_aarch32.cpp
--- openjdk/hotspot/src/cpu/aarch32/vm/sharedRuntime_aarch32.cpp	Fri Aug 12 18:10:58 2016 +0300
+++ openjdk/hotspot/src/cpu/aarch32/vm/sharedRuntime_aarch32.cpp	Mon Aug 15 12:51:22 2016 +0300
@@ -99,7 +99,7 @@
       r10_off, rmethod_off = r10_off,
       r11_off,
       r12_off,
-          reg_save_pad, // align area to 8-bytes to simplify stack alignment to 8
+      reg_save_pad, // align area to 8-bytes to simplify stack alignment to 8
       rfp_off,
       return_off,
       reg_save_size,
diff -r 36fd104e90c5 -r d82a138c4129 src/cpu/aarch32/vm/stubGenerator_aarch32.cpp
--- openjdk/hotspot/src/cpu/aarch32/vm/stubGenerator_aarch32.cpp	Fri Aug 12 18:10:58 2016 +0300
+++ openjdk/hotspot/src/cpu/aarch32/vm/stubGenerator_aarch32.cpp	Mon Aug 15 12:51:22 2016 +0300
@@ -1322,6 +1322,8 @@
       StubRoutines::_crc_table_adr = (address)StubRoutines::aarch32::_crc_table;
       StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
     }
+
+    NativeCall::init();
   }
 
   void generate_all() {
diff -r 36fd104e90c5 -r d82a138c4129 src/cpu/aarch32/vm/vm_version_aarch32.cpp
--- openjdk/hotspot/src/cpu/aarch32/vm/vm_version_aarch32.cpp	Fri Aug 12 18:10:58 2016 +0300
+++ openjdk/hotspot/src/cpu/aarch32/vm/vm_version_aarch32.cpp	Mon Aug 15 12:51:22 2016 +0300
@@ -34,103 +34,20 @@
 #ifdef TARGET_OS_FAMILY_linux
 # include "os_linux.inline.hpp"
 #endif
-
-#ifndef AT_HWCAP
-#define AT_HWCAP        16              /* Machine-dependent hints about
-                                           processor capabilities.  */
-#endif
-
-#ifndef AT_HWCAP2
-#define AT_HWCAP2       26              /* More machine-dependent hints about
-                                           processor capabilities.  */
-#endif
-
-#ifndef HWCAP2_PMULL
-#define HWCAP2_PMULL    (1 << 1)
-#endif
-
-#ifndef HWCAP2_AES
-#define HWCAP2_AES      (1 << 0)
-#endif
-
-#ifndef HWCAP2_SHA1
-#define HWCAP2_SHA1     (1 << 2)
-#endif
-
-#ifndef HWCAP2_SHA2
-#define HWCAP2_SHA2     (1 << 3)
-#endif
-
-#ifndef HWCAP2_CRC32
-#define HWCAP2_CRC32    (1 << 4)
-#endif
-
-#ifndef HWCAP_NEON
-#define HWCAP_NEON      (1 << 12)
-#endif
-
-#ifndef HWCAP_VFPv3
-#define HWCAP_VFPv3     (1 << 13)
-#endif
-
-#ifndef HWCAP_VFPv3D16
-#define HWCAP_VFPv3D16  (1 << 14)       /* also set for VFPv4-D16 */
-#endif
-
-#ifndef HWCAP_TLS
-#define HWCAP_TLS       (1 << 15)
-#endif
-
-#ifndef HWCAP_VFPv4
-#define HWCAP_VFPv4     (1 << 16)
-#endif
-
-#ifndef HWCAP_IDIVA
-#define HWCAP_IDIVA     (1 << 17)
-#endif
-
-#ifndef HWCAP_VFPD32
-#define HWCAP_VFPD32    (1 << 19)       /* set if VFP has 32 regs (not 16) */
-#endif
+#include "compiler/disassembler.hpp"
 
 enum ProcessorFeatures VM_Version::_features = FT_NONE;
 const char* VM_Version::_cpu_features = "";
 
 static BufferBlob* stub_blob;
 static const int stub_size = 550;
+volatile bool VM_Version::_is_determine_features_test_running = false;
 
 extern "C" {
   typedef void (*getPsrInfo_stub_t)(void*);
 }
 static getPsrInfo_stub_t getPsrInfo_stub = NULL;
 
-typedef unsigned long (*pgetauxval)(unsigned long type);
-
-class VM_Version_StubGenerator: public StubCodeGenerator {
- public:
-
-  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
-
-  address generate_getPsrInfo() {
-    StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
-#   define __ _masm->
-    address start = __ pc();
-
-    // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info);
-
-    address entry = __ pc();
-
-    // TODO : redefine fields in CpuidInfo and generate
-    // code to fill them in
-
-    __ b(lr);
-
-#   undef __
-
-    return start;
-  }
-};
-
 
 bool VM_Version::identify_procline(const char *tag, char **line) {
   char *i = *line;
@@ -165,87 +82,59 @@
 
   enum ProcessorFeatures f = FT_NONE;
 
-  // try the recommended way, by using glibc API.
-  // however since this API is only available in recent
-  // versions of glibc we got to invoke it indirectly for
-  // not to create compile and run-time dependency
-  pgetauxval getauxval_ptr = (pgetauxval) os::dll_lookup((void*) 0, "getauxval");
-  if (getauxval_ptr) {
-    unsigned long auxv2 = (*getauxval_ptr)(AT_HWCAP2);
-    unsigned long auxv = (*getauxval_ptr)(AT_HWCAP);
-    if (FLAG_IS_DEFAULT(UseCRC32)) {
-      UseCRC32 = (auxv2 & HWCAP2_CRC32) != 0;
-    }
-    if (auxv2 & HWCAP2_AES) {
-      UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
-      UseAESIntrinsics =
-              UseAESIntrinsics || (UseAES && FLAG_IS_DEFAULT(UseAESIntrinsics));
-      if (UseAESIntrinsics && !UseAES) {
-        warning("UseAESIntrinsics enabled, but UseAES not, enabling");
-        UseAES = true;
-      }
-    } else {
-      if (UseAES) {
-        warning("UseAES specified, but not supported on this CPU");
-      }
-      if (UseAESIntrinsics) {
-        warning("UseAESIntrinsics specified, but not supported on this CPU");
-      }
-    }
-    if (auxv & HWCAP_NEON)
-      f = (ProcessorFeatures) (f | FT_AdvSIMD);
-    if (auxv & HWCAP_IDIVA)
-      f = (ProcessorFeatures) (f | FT_HW_DIVIDE);
-    if (auxv & HWCAP_VFPv3)
-      f = (ProcessorFeatures) (f | FT_VFPV3 | FT_VFPV2);
-    if (auxv2 & HWCAP2_CRC32)
-      f = (ProcessorFeatures) (f | FT_CRC32);
+  // Allocate space for the code.
+  const int code_size = 10 * Assembler::instruction_size;
+  ResourceMark rm;
+  CodeBuffer cb("detect_cpu_features", code_size, 0);
+  MacroAssembler* a = new MacroAssembler(&cb);
+  jlong test_area;
+
+  // Must be set to true so we can generate the test code.
+  _features = FT_ALL;
+  // Emit code.
+  uint32_t *const code = (uint32_t *)a->pc();
+  void (*test)(address addr, uintptr_t offset)=(void(*)(address addr, uintptr_t nonzero))(void *)code;
+
+  a->udiv(r3, r2, r1);     // FT_HW_DIVIDE
+  a->bfc(r1, 1, 1);        // FT_ARMV6T2
+  a->vneg_f64(d0, d0);     // FT_VFPV2
+  a->vmov_f64(d0, 1.);     // FT_VFPV3
+  a->dmb(Assembler::ISH);  // FT_ARMV7
+  a->ldrexd(r2, r0);       // FT_ARMV6K
+  a->vmov_f64(d0, 0.0);    // FT_AdvSIMD
+  a->crc32b(r3, r2, r1);   // FT_CRC32
+  a->b(lr);
+
+  uint32_t *const code_end = (uint32_t *)a->pc();
+  a->flush();
+  _features = FT_NONE;
+
+  // Print the detection code.
+  if (PrintAssembly) {
+    ttyLocker ttyl;
+    tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " before execution:", p2i(code));
+    Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
   }
+  // Execute code. Illegal instructions will be replaced by 0 in the signal handler.
+  VM_Version::_is_determine_features_test_running = true;
+  (*test)((address)&test_area, 1);
+  VM_Version::_is_determine_features_test_running = false;
+
+  uint32_t *insn = code;
+  if (*insn++ != Assembler::nop_insn) f = (ProcessorFeatures) (f | FT_HW_DIVIDE);
+  if (*insn++ != Assembler::nop_insn) f = (ProcessorFeatures) (f | FT_ARMV6T2);
+  if (*insn++ != Assembler::nop_insn) f = (ProcessorFeatures) (f | FT_VFPV2);
+  if (*insn++ != Assembler::nop_insn) f = (ProcessorFeatures) (f | FT_VFPV3);
+  if (*insn++ != Assembler::nop_insn) f = (ProcessorFeatures) (f | FT_ARMV7);
+  if (*insn++ != Assembler::nop_insn) f = (ProcessorFeatures) (f | FT_ARMV6K);
+  if (*insn++ != Assembler::nop_insn) f = (ProcessorFeatures) (f | FT_AdvSIMD);
+  if (*insn++ != Assembler::nop_insn) f = (ProcessorFeatures) (f | FT_CRC32);
 
   int ncores = 0, cpu, variant, model, revision;
   char buf[2048], *i;
   if (FILE * fp = fopen("/proc/cpuinfo", "r")) {
     while ((i = fgets(buf, 2048, fp))) {
-      if (identify_procline("Features", &i)) {
-        i = strtok(i, " \n");
-        while (i) {
-          if (!strcmp("idiva", i)) {
-            f = (ProcessorFeatures) (f | FT_HW_DIVIDE);
-          } else if (!strcmp("vfpv3", i) || !strcmp("vfpv4", i)) {
-            // Assuming that vfpv4 implements all of vfpv3
-            // and that they both implement all of v2.
-            f = (ProcessorFeatures) (f | FT_VFPV3 | FT_VFPV2);
-          } else if (!strcmp("vfp", i)) {
-            // Assuming that VFPv2 is identified by plain vfp
-            f = (ProcessorFeatures) (f | FT_VFPV2);
-          } else if (!strcmp("neon", i)) {
-            f = (ProcessorFeatures) (f | FT_AdvSIMD);
-          }
-          i = strtok(NULL, " \n");
-        }
-      } else if (identify_procline("Processor", &i)) {
-        i = strtok(i, " \n");
-        while (i) {
-          // if the info is read correctly do
-          if (!strcmp("ARMv7", i)) {
-            f = (ProcessorFeatures) (f | FT_ARMV7);
-          } else if (!strcmp("ARMv6-compatible", i)) {
-            //TODO sort out the ARMv6 identification code
-          }
-          i = strtok(NULL, " \n");
-        }
-      } else if (identify_procline("model name", &i)) {
-        i = strtok(i, " \n");
-        while (i) {
-          // if the info is read correctly do
-          if (!strcmp("ARMv7", i) || !strcmp("AArch64", i)) {
-            f = (ProcessorFeatures) (f | FT_ARMV7);
-          } else if (!strcmp("ARMv6-compatible", i)) {
-            //TODO sort out the ARMv6 identification code
-          }
-          i = strtok(NULL, " \n");
-        }
-      } else if (identify_procline("processor", &i)) {
+      if (identify_procline("processor", &i)) {
         ncores++;
       } else if (identify_procline("CPU implementer", &i)) {
         cpu = strtol(i, NULL, 0);
@@ -326,11 +215,6 @@
     vm_exit_during_initialization("Unable to allocate getPsrInfo_stub");
   }
 
-  CodeBuffer c(stub_blob);
-  VM_Version_StubGenerator g(&c);
-  getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t,
-                                   g.generate_getPsrInfo());
-
   get_processor_features();
 
   //FIXME: turning off CriticalJNINatives flag while it is not implemented
diff -r 36fd104e90c5 -r d82a138c4129 src/cpu/aarch32/vm/vm_version_aarch32.hpp
--- openjdk/hotspot/src/cpu/aarch32/vm/vm_version_aarch32.hpp	Fri Aug 12 18:10:58 2016 +0300
+++ openjdk/hotspot/src/cpu/aarch32/vm/vm_version_aarch32.hpp	Mon Aug 15 12:51:22 2016 +0300
@@ -41,6 +41,7 @@
   FT_SINGLE_CORE = 64,
   FT_AdvSIMD = 128,
   FT_CRC32 = 256,
+  FT_ALL = 0xffff
 };
 
 class VM_Version : public Abstract_VM_Version {
@@ -67,6 +68,7 @@
  private:
   static enum ProcessorFeatures _features;
   static const char* _cpu_features;
+    static volatile bool _is_determine_features_test_running;
 
   static void get_processor_features();
   static bool identify_procline(const char *tag, char **line);
@@ -75,7 +77,12 @@
   static enum ProcessorFeatures features() {
     return _features;
   }
+    static void features(ProcessorFeatures f) {
+      _features = f;
+    }
   static const char* cpu_features() { return _cpu_features; }
+
+    static bool is_determine_features_test_running() { return _is_determine_features_test_running; }
 };
 
 #endif // CPU_AARCH32_VM_VM_VERSION_AARCH32_HPP
diff -r 36fd104e90c5 -r d82a138c4129 src/os_cpu/linux_aarch32/vm/atomic_linux_aarch32.inline.hpp
--- openjdk/hotspot/src/os_cpu/linux_aarch32/vm/atomic_linux_aarch32.inline.hpp	Fri Aug 12 18:10:58 2016 +0300
+++ openjdk/hotspot/src/os_cpu/linux_aarch32/vm/atomic_linux_aarch32.inline.hpp	Mon Aug 15 12:51:22 2016 +0300
@@ -33,9 +33,15 @@
 
 // Implementation of class atomic
 
+#if defined(__ARM_ARCH) && __ARM_ARCH >= 7
+#define FULL_MEM_BARRIER  __asm__ __volatile__ ("dmb ish"   : : : "memory")
+#define READ_MEM_BARRIER  __asm__ __volatile__ ("dmb ish"   : : : "memory")
+#define WRITE_MEM_BARRIER __asm__ __volatile__ ("dmb ishst" : : : "memory")
+#else
 #define FULL_MEM_BARRIER  __sync_synchronize()
 #define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
 #define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
+#endif
 
 inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
 inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
diff -r 36fd104e90c5 -r d82a138c4129 src/os_cpu/linux_aarch32/vm/os_linux_aarch32.cpp
--- openjdk/hotspot/src/os_cpu/linux_aarch32/vm/os_linux_aarch32.cpp	Fri Aug 12 18:10:58 2016 +0300
+++ openjdk/hotspot/src/os_cpu/linux_aarch32/vm/os_linux_aarch32.cpp	Mon Aug 15 12:51:22 2016 +0300
@@ -375,6 +375,11 @@
           // Determination of interpreter/vtable stub/compiled code null exception
           stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
       }
+    } else if (sig == SIGILL && VM_Version::is_determine_features_test_running()) {
+        // SIGILL must be caused by VM_Version::get_processor_features().
+        *(int *)pc = Assembler::nop_insn; // patch instruction to NOP to indicate that it causes a SIGILL,
+                        // flushing of icache is not necessary.
+        stub = pc + 4;  // continue with next instruction.
     } else if (thread->thread_state() == _thread_in_vm &&
                sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
                thread->doing_unsafe_access()) {