From 880169bc878cbf9c5f858793e628b995cdbf2709 Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Dec 10 2019 05:51:40 +0000 Subject: Use moonjit 2.1.2 as base tarball The LuaJIT project has not been doing releases for a few years, so use releases from the v2.1 branch of moonjit as the base tarball. The version number follows moonjit's numbering. This is a risk if upstream LuaJIT suddenly starts doing releases, but it is not serious enough since the moonjit v2.1 branch remains in sync. We can mitigate the version number inconsistency by slowing down moonjit version number bumps. I have also dropped changes that are unlikely to make it into LuaJIT. I will propose a separate package for moonjit v2.2 and beyond since that will have a very different trajectory from LuaJIT. --- diff --git a/.gitignore b/.gitignore index 52f1a8f..6f269d0 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ /LuaJIT-2.0.4.tar.gz /LuaJIT-2.1.0-beta2.tar.gz /LuaJIT-2.1.0-beta3.tar.gz +/2.1.2.tar.gz diff --git a/0001-Fix-LJ_MAX_JSLOTS-assertion-in-rec_check_slots.patch b/0001-Fix-LJ_MAX_JSLOTS-assertion-in-rec_check_slots.patch deleted file mode 100644 index 16aca3b..0000000 --- a/0001-Fix-LJ_MAX_JSLOTS-assertion-in-rec_check_slots.patch +++ /dev/null @@ -1,31 +0,0 @@ -commit 31afda31814ec02f82ffb0519bee496c87eeaa89 -Merge: 8271c64 1c89933 -Author: Mike Pall -Date: Tue May 9 21:01:23 2017 +0200 - - Merge branch 'master' into v2.1 - -commit 1c89933f129dde76944336c6bfd05297b8d67730 -Author: Mike Pall -Date: Tue May 9 20:59:37 2017 +0200 - - Fix LJ_MAX_JSLOTS assertion in rec_check_slots(). - - Thanks to Yichun Zhang. - -diff --git a/src/lj_record.c b/src/lj_record.c -index 9d0469c..c2d0274 100644 ---- a/src/lj_record.c -+++ b/src/lj_record.c -@@ -87,9 +87,9 @@ static void rec_check_slots(jit_State *J) - BCReg s, nslots = J->baseslot + J->maxslot; - int32_t depth = 0; - cTValue *base = J->L->base - J->baseslot; -- lua_assert(J->baseslot >= 1+LJ_FR2 && J->baseslot < LJ_MAX_JSLOTS); -+ lua_assert(J->baseslot >= 1+LJ_FR2); - lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME)); -- lua_assert(nslots < LJ_MAX_JSLOTS); -+ lua_assert(nslots <= LJ_MAX_JSLOTS); - for (s = 0; s < nslots; s++) { - TRef tr = J->slot[s]; - if (tr) { diff --git a/0002-Add-missing-LJ_MAX_JSLOTS-check.patch b/0002-Add-missing-LJ_MAX_JSLOTS-check.patch deleted file mode 100644 index 70ccfd5..0000000 --- a/0002-Add-missing-LJ_MAX_JSLOTS-check.patch +++ /dev/null @@ -1,40 +0,0 @@ -commit 6259c0b909a8c00fabe3c7e6bd81150ee08cbf9f -Merge: 31afda3 630ff31 -Author: Mike Pall -Date: Wed May 17 17:38:53 2017 +0200 - - Merge branch 'master' into v2.1 - -commit 630ff3196a06353c6a7ccd1e9ac3958f4a8ca13c -Author: Mike Pall -Date: Wed May 17 17:37:35 2017 +0200 - - Add missing LJ_MAX_JSLOTS check. - - Thanks to Yichun Zhang. - -From 630ff3196a06353c6a7ccd1e9ac3958f4a8ca13c Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Wed, 17 May 2017 17:37:35 +0200 -Subject: [PATCH 02/72] Add missing LJ_MAX_JSLOTS check. - -Thanks to Yichun Zhang. ---- - src/lj_record.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/src/lj_record.c b/src/lj_record.c -index cecacd2..bc4e8a6 100644 ---- a/src/lj_record.c -+++ b/src/lj_record.c -@@ -633,6 +633,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs) - J->framedepth++; - J->base += func+1+LJ_FR2; - J->baseslot += func+1+LJ_FR2; -+ if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) -+ lj_trace_err(J, LJ_TRERR_STACKOV); - } - - /* Record tail call. */ --- -2.20.1 diff --git a/0003-MIPS-Use-precise-search-for-exit-jump-patching.patch b/0003-MIPS-Use-precise-search-for-exit-jump-patching.patch deleted file mode 100644 index 9d8300f..0000000 --- a/0003-MIPS-Use-precise-search-for-exit-jump-patching.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 7381b620358c2561e8690149f1d25828fdad6675 Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Wed, 7 Jun 2017 19:16:22 +0200 -Subject: [PATCH 03/72] MIPS: Use precise search for exit jump patching. - -Contributed by Djordje Kovacevic and Stefan Pejic. ---- - src/lj_asm_mips.h | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h -index 03270cc..d0a1ca5 100644 ---- a/src/lj_asm_mips.h -+++ b/src/lj_asm_mips.h -@@ -1933,7 +1933,11 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) - MCode tjump = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); - for (p++; p < pe; p++) { - if (*p == exitload) { /* Look for load of exit number. */ -- if (((p[-1] ^ (px-p)) & 0xffffu) == 0) { /* Look for exitstub branch. */ -+ /* Look for exitstub branch. Yes, this covers all used branch variants. */ -+ if (((p[-1] ^ (px-p)) & 0xffffu) == 0 && -+ ((p[-1] & 0xf0000000u) == MIPSI_BEQ || -+ (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ || -+ (p[-1] & 0xffe00000u) == MIPSI_BC1F)) { - ptrdiff_t delta = target - p; - if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */ - patchbranch: --- -2.20.1 - diff --git a/0004-MIPS-Fix-handling-of-spare-long-range-jump-slots.patch b/0004-MIPS-Fix-handling-of-spare-long-range-jump-slots.patch deleted file mode 100644 index 4da6b4d..0000000 --- a/0004-MIPS-Fix-handling-of-spare-long-range-jump-slots.patch +++ /dev/null @@ -1,77 +0,0 @@ -From c7c3c4da432ddb543d4b0a9abbb245f11b26afd0 Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Wed, 7 Jun 2017 19:36:46 +0200 -Subject: [PATCH 04/72] MIPS: Fix handling of spare long-range jump slots. - -Contributed by Djordje Kovacevic and Stefan Pejic. ---- - src/lj_asm_mips.h | 9 +++++---- - src/lj_jit.h | 6 ++++++ - src/lj_mcode.c | 6 ------ - 3 files changed, 11 insertions(+), 10 deletions(-) - -diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h -index d0a1ca5..7631190 100644 ---- a/src/lj_asm_mips.h -+++ b/src/lj_asm_mips.h -@@ -65,10 +65,9 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) - static void asm_sparejump_setup(ASMState *as) - { - MCode *mxp = as->mcbot; -- /* Assumes sizeof(MCLink) == 8. */ -- if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == 8) { -+ if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == sizeof(MCLink)) { - lua_assert(MIPSI_NOP == 0); -- memset(mxp+2, 0, MIPS_SPAREJUMP*8); -+ memset(mxp, 0, MIPS_SPAREJUMP*2*sizeof(MCode)); - mxp += MIPS_SPAREJUMP*2; - lua_assert(mxp < as->mctop); - lj_mcode_sync(as->mcbot, mxp); -@@ -1947,7 +1946,9 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) - if (!cstart) cstart = p-1; - } else { /* Branch out of range. Use spare jump slot in mcarea. */ - int i; -- for (i = 2; i < 2+MIPS_SPAREJUMP*2; i += 2) { -+ for (i = (int)(sizeof(MCLink)/sizeof(MCode)); -+ i < (int)(sizeof(MCLink)/sizeof(MCode)+MIPS_SPAREJUMP*2); -+ i += 2) { - if (mcarea[i] == tjump) { - delta = mcarea+i - p; - goto patchbranch; -diff --git a/src/lj_jit.h b/src/lj_jit.h -index a2e8fd9..3f38d28 100644 ---- a/src/lj_jit.h -+++ b/src/lj_jit.h -@@ -155,6 +155,12 @@ typedef uint8_t MCode; - typedef uint32_t MCode; - #endif - -+/* Linked list of MCode areas. */ -+typedef struct MCLink { -+ MCode *next; /* Next area. */ -+ size_t size; /* Size of current area. */ -+} MCLink; -+ - /* Stack snapshot header. */ - typedef struct SnapShot { - uint16_t mapofs; /* Offset into snapshot map. */ -diff --git a/src/lj_mcode.c b/src/lj_mcode.c -index f0a1f69..5ea89f6 100644 ---- a/src/lj_mcode.c -+++ b/src/lj_mcode.c -@@ -272,12 +272,6 @@ static void *mcode_alloc(jit_State *J, size_t sz) - - /* -- MCode area management ----------------------------------------------- */ - --/* Linked list of MCode areas. */ --typedef struct MCLink { -- MCode *next; /* Next area. */ -- size_t size; /* Size of current area. */ --} MCLink; -- - /* Allocate a new MCode area. */ - static void mcode_allocarea(jit_State *J) - { --- -2.20.1 - diff --git a/0005-MIPS64-Add-soft-float-support-to-JIT-compiler-backen.patch b/0005-MIPS64-Add-soft-float-support-to-JIT-compiler-backen.patch deleted file mode 100644 index dda4ae2..0000000 --- a/0005-MIPS64-Add-soft-float-support-to-JIT-compiler-backen.patch +++ /dev/null @@ -1,982 +0,0 @@ -From a057a07ab702e225e21848d4f918886c5b0ac06b Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Wed, 7 Jun 2017 23:56:54 +0200 -Subject: [PATCH 05/72] MIPS64: Add soft-float support to JIT compiler backend. - -Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. -Sponsored by Cisco Systems, Inc. ---- - src/lj_arch.h | 4 +- - src/lj_asm.c | 8 +- - src/lj_asm_mips.h | 217 +++++++++++++++++++++++++++++++++++++-------- - src/lj_crecord.c | 4 +- - src/lj_emit_mips.h | 2 + - src/lj_ffrecord.c | 2 +- - src/lj_ircall.h | 43 ++++++--- - src/lj_iropt.h | 2 +- - src/lj_jit.h | 4 +- - src/lj_obj.h | 3 + - src/lj_opt_split.c | 2 +- - src/lj_snap.c | 21 +++-- - src/vm_mips64.dasc | 49 ++++++++++ - 13 files changed, 286 insertions(+), 75 deletions(-) - -diff --git a/src/lj_arch.h b/src/lj_arch.h -index c8d7138..b770564 100644 ---- a/src/lj_arch.h -+++ b/src/lj_arch.h -@@ -337,9 +337,6 @@ - #define LJ_ARCH_BITS 32 - #define LJ_TARGET_MIPS32 1 - #else --#if LJ_ABI_SOFTFP || !LJ_ARCH_HASFPU --#define LJ_ARCH_NOJIT 1 /* NYI */ --#endif - #define LJ_ARCH_BITS 64 - #define LJ_TARGET_MIPS64 1 - #define LJ_TARGET_GC64 1 -@@ -512,6 +509,7 @@ - #define LJ_ABI_SOFTFP 0 - #endif - #define LJ_SOFTFP (!LJ_ARCH_HASFPU) -+#define LJ_SOFTFP32 (LJ_SOFTFP && LJ_32) - - #if LJ_ARCH_ENDIAN == LUAJIT_BE - #define LJ_LE 0 -diff --git a/src/lj_asm.c b/src/lj_asm.c -index c2cf5a9..bed2268 100644 ---- a/src/lj_asm.c -+++ b/src/lj_asm.c -@@ -338,7 +338,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref) - ra_modified(as, r); - ir->r = RID_INIT; /* Do not keep any hint. */ - RA_DBGX((as, "remat $i $r", ir, r)); --#if !LJ_SOFTFP -+#if !LJ_SOFTFP32 - if (ir->o == IR_KNUM) { - emit_loadk64(as, r, ir); - } else -@@ -1305,7 +1305,7 @@ static void asm_call(ASMState *as, IRIns *ir) - asm_gencall(as, ci, args); - } - --#if !LJ_SOFTFP -+#if !LJ_SOFTFP32 - static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) - { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; -@@ -1652,10 +1652,10 @@ static void asm_ir(ASMState *as, IRIns *ir) - case IR_MUL: asm_mul(as, ir); break; - case IR_MOD: asm_mod(as, ir); break; - case IR_NEG: asm_neg(as, ir); break; --#if LJ_SOFTFP -+#if LJ_SOFTFP32 - case IR_DIV: case IR_POW: case IR_ABS: - case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: -- lua_assert(0); /* Unused for LJ_SOFTFP. */ -+ lua_assert(0); /* Unused for LJ_SOFTFP32. */ - break; - #else - case IR_DIV: asm_div(as, ir); break; -diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h -index 05af3d0..1406a87 100644 ---- a/src/lj_asm_mips.h -+++ b/src/lj_asm_mips.h -@@ -290,7 +290,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) - { - ra_leftov(as, gpr, ref); - gpr++; --#if LJ_64 -+#if LJ_64 && !LJ_SOFTFP - fpr++; - #endif - } -@@ -301,7 +301,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) - emit_spstore(as, ir, r, ofs); - ofs += irt_isnum(ir->t) ? 8 : 4; - #else -- emit_spstore(as, ir, r, ofs + ((LJ_BE && (LJ_SOFTFP || r < RID_MAX_GPR) && !irt_is64(ir->t)) ? 4 : 0)); -+ emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isfp(ir->t) && !irt_is64(ir->t)) ? 4 : 0)); - ofs += 8; - #endif - } -@@ -312,7 +312,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) - #endif - if (gpr <= REGARG_LASTGPR) { - gpr++; --#if LJ_64 -+#if LJ_64 && !LJ_SOFTFP - fpr++; - #endif - } else { -@@ -461,12 +461,36 @@ static void asm_tobit(ASMState *as, IRIns *ir) - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fgh(as, MIPSI_ADD_D, tmp, left, right); - } -+#elif LJ_64 /* && LJ_SOFTFP */ -+static void asm_tointg(ASMState *as, IRIns *ir, Reg r) -+{ -+ /* The modified regs must match with the *.dasc implementation. */ -+ RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)| -+ RID2RSET(RID_R1)|RID2RSET(RID_R12); -+ if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); -+ ra_evictset(as, drop); -+ /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */ -+ ra_destreg(as, ir, RID_RET); -+ asm_guard(as, MIPSI_BNE, RID_RET+1, RID_ZERO); -+ emit_call(as, (void *)lj_ir_callinfo[IRCALL_lj_vm_tointg].func, 0); -+ if (r == RID_NONE) -+ ra_leftov(as, REGARG_FIRSTGPR, ir->op1); -+ else if (r != REGARG_FIRSTGPR) -+ emit_move(as, REGARG_FIRSTGPR, r); -+} -+ -+static void asm_tobit(ASMState *as, IRIns *ir) -+{ -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ emit_dta(as, MIPSI_SLL, dest, dest, 0); -+ asm_callid(as, ir, IRCALL_lj_vm_tobit); -+} - #endif - - static void asm_conv(ASMState *as, IRIns *ir) - { - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); --#if !LJ_SOFTFP -+#if !LJ_SOFTFP32 - int stfp = (st == IRT_NUM || st == IRT_FLOAT); - #endif - #if LJ_64 -@@ -477,12 +501,13 @@ static void asm_conv(ASMState *as, IRIns *ir) - lua_assert(!(irt_isint64(ir->t) || - (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ - #endif --#if LJ_32 && LJ_SOFTFP -+#if LJ_SOFTFP32 - /* FP conversions are handled by SPLIT. */ - lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); - /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ - #else - lua_assert(irt_type(ir->t) != st); -+#if !LJ_SOFTFP - if (irt_isfp(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - if (stfp) { /* FP to FP conversion. */ -@@ -608,6 +633,42 @@ static void asm_conv(ASMState *as, IRIns *ir) - } - } - } else -+#else -+ if (irt_isfp(ir->t)) { -+#if LJ_64 && LJ_HASFFI -+ if (stfp) { /* FP to FP conversion. */ -+ asm_callid(as, ir, irt_isnum(ir->t) ? IRCALL_softfp_f2d : -+ IRCALL_softfp_d2f); -+ } else { /* Integer to FP conversion. */ -+ IRCallID cid = ((IRT_IS64 >> st) & 1) ? -+ (irt_isnum(ir->t) ? -+ (st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d) : -+ (st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f)) : -+ (irt_isnum(ir->t) ? -+ (st == IRT_INT ? IRCALL_softfp_i2d : IRCALL_softfp_ui2d) : -+ (st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f)); -+ asm_callid(as, ir, cid); -+ } -+#else -+ asm_callid(as, ir, IRCALL_softfp_i2d); -+#endif -+ } else if (stfp) { /* FP to integer conversion. */ -+ if (irt_isguard(ir->t)) { -+ /* Checked conversions are only supported from number to int. */ -+ lua_assert(irt_isint(ir->t) && st == IRT_NUM); -+ asm_tointg(as, ir, RID_NONE); -+ } else { -+ IRCallID cid = irt_is64(ir->t) ? -+ ((st == IRT_NUM) ? -+ (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) : -+ (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) : -+ ((st == IRT_NUM) ? -+ (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : -+ (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)); -+ asm_callid(as, ir, cid); -+ } -+ } else -+#endif - #endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); -@@ -665,7 +726,7 @@ static void asm_strto(ASMState *as, IRIns *ir) - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; - IRRef args[2]; - int32_t ofs = 0; --#if LJ_SOFTFP -+#if LJ_SOFTFP32 - ra_evictset(as, RSET_SCRATCH); - if (ra_used(ir)) { - if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && -@@ -806,7 +867,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - MCLabel l_end, l_loop, l_next; - - rset_clear(allow, tab); --#if LJ_32 && LJ_SOFTFP -+#if LJ_SOFTFP32 - if (!isk) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); -@@ -826,7 +887,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - } - } - #else -- if (irt_isnum(kt)) { -+ if (!LJ_SOFTFP && irt_isnum(kt)) { - key = ra_alloc1(as, refkey, RSET_FPR); - tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); - } else if (!irt_ispri(kt)) { -@@ -882,6 +943,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15); - emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum); - emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); -+ } else if (LJ_SOFTFP && irt_isnum(kt)) { -+ emit_branch(as, MIPSI_BEQ, tmp1, key, l_end); -+ emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); - } else if (irt_isaddr(kt)) { - Reg refk = tmp2; - if (isk) { -@@ -960,7 +1024,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); - if (irt_isnum(kt)) { - emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); -- emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0); -+ emit_dta(as, MIPSI_DSRA32, tmp1, LJ_SOFTFP ? key : tmp1, 0); - emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0); - #if !LJ_SOFTFP - emit_tg(as, MIPSI_DMFC1, tmp1, key); -@@ -1123,7 +1187,7 @@ static MIPSIns asm_fxloadins(IRIns *ir) - case IRT_U8: return MIPSI_LBU; - case IRT_I16: return MIPSI_LH; - case IRT_U16: return MIPSI_LHU; -- case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_LDC1; -+ case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_LDC1; - case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1; - default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW; - } -@@ -1134,7 +1198,7 @@ static MIPSIns asm_fxstoreins(IRIns *ir) - switch (irt_type(ir->t)) { - case IRT_I8: case IRT_U8: return MIPSI_SB; - case IRT_I16: case IRT_U16: return MIPSI_SH; -- case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_SDC1; -+ case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_SDC1; - case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1; - default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW; - } -@@ -1199,7 +1263,7 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) - - static void asm_ahuvload(ASMState *as, IRIns *ir) - { -- int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP); -+ int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP); - Reg dest = RID_NONE, type = RID_TMP, idx; - RegSet allow = RSET_GPR; - int32_t ofs = 0; -@@ -1212,7 +1276,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) - } - } - if (ra_used(ir)) { -- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || -+ lua_assert((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); - dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); - rset_clear(allow, dest); -@@ -1261,10 +1325,10 @@ static void asm_ahustore(ASMState *as, IRIns *ir) - int32_t ofs = 0; - if (ir->r == RID_SINK) - return; -- if (!LJ_SOFTFP && irt_isnum(ir->t)) { -- src = ra_alloc1(as, ir->op2, RSET_FPR); -+ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { -+ src = ra_alloc1(as, ir->op2, LJ_SOFTFP ? RSET_GPR : RSET_FPR); - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); -- emit_hsi(as, MIPSI_SDC1, src, idx, ofs); -+ emit_hsi(as, LJ_SOFTFP ? MIPSI_SD : MIPSI_SDC1, src, idx, ofs); - } else { - #if LJ_32 - if (!irt_ispri(ir->t)) { -@@ -1312,7 +1376,7 @@ static void asm_sload(ASMState *as, IRIns *ir) - IRType1 t = ir->t; - #if LJ_32 - int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); -- int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP); -+ int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP); - if (hiop) - t.irt = IRT_NUM; - #else -@@ -1320,7 +1384,7 @@ static void asm_sload(ASMState *as, IRIns *ir) - #endif - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); --#if LJ_32 && LJ_SOFTFP -+#if LJ_SOFTFP32 - lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ - if (hiop && ra_used(ir+1)) { - type = ra_dest(as, ir+1, allow); -@@ -1328,29 +1392,44 @@ static void asm_sload(ASMState *as, IRIns *ir) - } - #else - if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { -- dest = ra_scratch(as, RSET_FPR); -+ dest = ra_scratch(as, LJ_SOFTFP ? allow : RSET_FPR); - asm_tointg(as, ir, dest); - t.irt = IRT_NUM; /* Continue with a regular number type check. */ - } else - #endif - if (ra_used(ir)) { -- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || -+ lua_assert((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); - dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); - rset_clear(allow, dest); - base = ra_alloc1(as, REF_BASE, allow); - rset_clear(allow, base); -- if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { -+ if (!LJ_SOFTFP32 && (ir->op2 & IRSLOAD_CONVERT)) { - if (irt_isint(t)) { -- Reg tmp = ra_scratch(as, RSET_FPR); -+ Reg tmp = ra_scratch(as, LJ_SOFTFP ? RSET_GPR : RSET_FPR); -+#if LJ_SOFTFP -+ ra_evictset(as, rset_exclude(RSET_SCRATCH, dest)); -+ ra_destreg(as, ir, RID_RET); -+ emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_d2i].func, 0); -+ if (tmp != REGARG_FIRSTGPR) -+ emit_move(as, REGARG_FIRSTGPR, tmp); -+#else - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp); -+#endif - dest = tmp; - t.irt = IRT_NUM; /* Check for original type. */ - } else { - Reg tmp = ra_scratch(as, RSET_GPR); -+#if LJ_SOFTFP -+ ra_evictset(as, rset_exclude(RSET_SCRATCH, dest)); -+ ra_destreg(as, ir, RID_RET); -+ emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_i2d].func, 0); -+ emit_dta(as, MIPSI_SLL, REGARG_FIRSTGPR, tmp, 0); -+#else - emit_fg(as, MIPSI_CVT_D_W, dest, dest); - emit_tg(as, MIPSI_MTC1, tmp, dest); -+#endif - dest = tmp; - t.irt = IRT_INT; /* Check for original type. */ - } -@@ -1399,7 +1478,7 @@ dotypecheck: - if (irt_isnum(t)) { - asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM); -- if (ra_hasreg(dest)) -+ if (!LJ_SOFTFP && ra_hasreg(dest)) - emit_hsi(as, MIPSI_LDC1, dest, base, ofs); - } else { - asm_guard(as, MIPSI_BNE, RID_TMP, -@@ -1409,7 +1488,7 @@ dotypecheck: - } - emit_tsi(as, MIPSI_LD, type, base, ofs); - } else if (ra_hasreg(dest)) { -- if (irt_isnum(t)) -+ if (!LJ_SOFTFP && irt_isnum(t)) - emit_hsi(as, MIPSI_LDC1, dest, base, ofs); - else - emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base, -@@ -1548,26 +1627,40 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi) - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); - emit_fg(as, mi, dest, left); - } -+#endif - -+#if !LJ_SOFTFP32 - static void asm_fpmath(ASMState *as, IRIns *ir) - { - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - return; -+#if !LJ_SOFTFP - if (ir->op2 <= IRFPM_TRUNC) - asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2); - else if (ir->op2 == IRFPM_SQRT) - asm_fpunary(as, ir, MIPSI_SQRT_D); - else -+#endif - asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); - } - #endif - -+#if !LJ_SOFTFP -+#define asm_fpadd(as, ir) asm_fparith(as, ir, MIPSI_ADD_D) -+#define asm_fpsub(as, ir) asm_fparith(as, ir, MIPSI_SUB_D) -+#define asm_fpmul(as, ir) asm_fparith(as, ir, MIPSI_MUL_D) -+#elif LJ_64 /* && LJ_SOFTFP */ -+#define asm_fpadd(as, ir) asm_callid(as, ir, IRCALL_softfp_add) -+#define asm_fpsub(as, ir) asm_callid(as, ir, IRCALL_softfp_sub) -+#define asm_fpmul(as, ir) asm_callid(as, ir, IRCALL_softfp_mul) -+#endif -+ - static void asm_add(ASMState *as, IRIns *ir) - { - IRType1 t = ir->t; --#if !LJ_SOFTFP -+#if !LJ_SOFTFP32 - if (irt_isnum(t)) { -- asm_fparith(as, ir, MIPSI_ADD_D); -+ asm_fpadd(as, ir); - } else - #endif - { -@@ -1589,9 +1682,9 @@ static void asm_add(ASMState *as, IRIns *ir) - - static void asm_sub(ASMState *as, IRIns *ir) - { --#if !LJ_SOFTFP -+#if !LJ_SOFTFP32 - if (irt_isnum(ir->t)) { -- asm_fparith(as, ir, MIPSI_SUB_D); -+ asm_fpsub(as, ir); - } else - #endif - { -@@ -1605,9 +1698,9 @@ static void asm_sub(ASMState *as, IRIns *ir) - - static void asm_mul(ASMState *as, IRIns *ir) - { --#if !LJ_SOFTFP -+#if !LJ_SOFTFP32 - if (irt_isnum(ir->t)) { -- asm_fparith(as, ir, MIPSI_MUL_D); -+ asm_fpmul(as, ir); - } else - #endif - { -@@ -1634,7 +1727,7 @@ static void asm_mod(ASMState *as, IRIns *ir) - asm_callid(as, ir, IRCALL_lj_vm_modi); - } - --#if !LJ_SOFTFP -+#if !LJ_SOFTFP32 - static void asm_pow(ASMState *as, IRIns *ir) - { - #if LJ_64 && LJ_HASFFI -@@ -1654,7 +1747,11 @@ static void asm_div(ASMState *as, IRIns *ir) - IRCALL_lj_carith_divu64); - else - #endif -+#if !LJ_SOFTFP - asm_fparith(as, ir, MIPSI_DIV_D); -+#else -+ asm_callid(as, ir, IRCALL_softfp_div); -+#endif - } - #endif - -@@ -1664,6 +1761,13 @@ static void asm_neg(ASMState *as, IRIns *ir) - if (irt_isnum(ir->t)) { - asm_fpunary(as, ir, MIPSI_NEG_D); - } else -+#elif LJ_64 /* && LJ_SOFTFP */ -+ if (irt_isnum(ir->t)) { -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); -+ emit_dst(as, MIPSI_XOR, dest, left, -+ ra_allock(as, 0x8000000000000000ll, rset_exclude(RSET_GPR, dest))); -+ } else - #endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); -@@ -1673,7 +1777,17 @@ static void asm_neg(ASMState *as, IRIns *ir) - } - } - -+#if !LJ_SOFTFP - #define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D) -+#elif LJ_64 /* && LJ_SOFTFP */ -+static void asm_abs(ASMState *as, IRIns *ir) -+{ -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); -+ emit_tsml(as, MIPSI_DEXTM, dest, left, 30, 0); -+} -+#endif -+ - #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) - #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) - -@@ -1918,15 +2032,21 @@ static void asm_bror(ASMState *as, IRIns *ir) - } - } - --#if LJ_32 && LJ_SOFTFP -+#if LJ_SOFTFP - static void asm_sfpmin_max(ASMState *as, IRIns *ir) - { - CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax]; -+#if LJ_64 -+ IRRef args[2]; -+ args[0] = ir->op1; -+ args[1] = ir->op2; -+#else - IRRef args[4]; - args[0^LJ_BE] = ir->op1; - args[1^LJ_BE] = (ir+1)->op1; - args[2^LJ_BE] = ir->op2; - args[3^LJ_BE] = (ir+1)->op2; -+#endif - asm_setupresult(as, ir, &ci); - emit_call(as, (void *)ci.func, 0); - ci.func = NULL; -@@ -1936,7 +2056,10 @@ static void asm_sfpmin_max(ASMState *as, IRIns *ir) - - static void asm_min_max(ASMState *as, IRIns *ir, int ismax) - { -- if (!LJ_SOFTFP && irt_isnum(ir->t)) { -+ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { -+#if LJ_SOFTFP -+ asm_sfpmin_max(as, ir); -+#else - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; -@@ -1947,6 +2070,7 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) - if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); - } - emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left); -+#endif - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_alloc2(as, ir, RSET_GPR); -@@ -1967,18 +2091,24 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) - - /* -- Comparisons --------------------------------------------------------- */ - --#if LJ_32 && LJ_SOFTFP -+#if LJ_SOFTFP - /* SFP comparisons. */ - static void asm_sfpcomp(ASMState *as, IRIns *ir) - { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; - RegSet drop = RSET_SCRATCH; - Reg r; -+#if LJ_64 -+ IRRef args[2]; -+ args[0] = ir->op1; -+ args[1] = ir->op2; -+#else - IRRef args[4]; - args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1; - args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2; -+#endif - -- for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { -+ for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+(LJ_64?1:3); r++) { - if (!rset_test(as->freeset, r) && - regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) - rset_clear(drop, r); -@@ -2032,11 +2162,15 @@ static void asm_comp(ASMState *as, IRIns *ir) - { - /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ - IROp op = ir->o; -- if (!LJ_SOFTFP && irt_isnum(ir->t)) { -+ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { -+#if LJ_SOFTFP -+ asm_sfpcomp(as, ir); -+#else - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); - emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right); -+#endif - } else { - Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); - if (op == IR_ABC) op = IR_UGT; -@@ -2068,9 +2202,13 @@ static void asm_equal(ASMState *as, IRIns *ir) - Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ? - RSET_FPR : RSET_GPR); - right = (left >> 8); left &= 255; -- if (!LJ_SOFTFP && irt_isnum(ir->t)) { -+ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { -+#if LJ_SOFTFP -+ asm_sfpcomp(as, ir); -+#else - asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); - emit_fgh(as, MIPSI_C_EQ_D, 0, left, right); -+#endif - } else { - asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right); - } -@@ -2263,7 +2401,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) - if ((sn & SNAP_NORESTORE)) - continue; - if (irt_isnum(ir->t)) { --#if LJ_SOFTFP -+#if LJ_SOFTFP32 - Reg tmp; - RegSet allow = rset_exclude(RSET_GPR, RID_BASE); - lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ -@@ -2272,6 +2410,9 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) - if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); - tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); - emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); -+#elif LJ_SOFTFP /* && LJ_64 */ -+ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); -+ emit_tsi(as, MIPSI_SD, src, RID_BASE, ofs); - #else - Reg src = ra_alloc1(as, ref, RSET_FPR); - emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs); -diff --git a/src/lj_crecord.c b/src/lj_crecord.c -index e32ae23..fd59e28 100644 ---- a/src/lj_crecord.c -+++ b/src/lj_crecord.c -@@ -212,7 +212,7 @@ static void crec_copy_emit(jit_State *J, CRecMemList *ml, MSize mlp, - ml[i].trval = emitir(IRT(IR_XLOAD, ml[i].tp), trsptr, 0); - ml[i].trofs = trofs; - i++; -- rwin += (LJ_SOFTFP && ml[i].tp == IRT_NUM) ? 2 : 1; -+ rwin += (LJ_SOFTFP32 && ml[i].tp == IRT_NUM) ? 2 : 1; - if (rwin >= CREC_COPY_REGWIN || i >= mlp) { /* Flush buffered stores. */ - rwin = 0; - for ( ; j < i; j++) { -@@ -1130,7 +1130,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, - else - tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT); - } -- } else if (LJ_SOFTFP && ctype_isfp(d->info) && d->size > 4) { -+ } else if (LJ_SOFTFP32 && ctype_isfp(d->info) && d->size > 4) { - lj_needsplit(J); - } - #if LJ_TARGET_X86 -diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h -index 8a9ee24..bb6593a 100644 ---- a/src/lj_emit_mips.h -+++ b/src/lj_emit_mips.h -@@ -12,6 +12,8 @@ static intptr_t get_k64val(IRIns *ir) - return (intptr_t)ir_kgc(ir); - } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { - return (intptr_t)ir_kptr(ir); -+ } else if (LJ_SOFTFP && ir->o == IR_KNUM) { -+ return (intptr_t)ir_knum(ir)->u64; - } else { - lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); - return ir->i; /* Sign-extended. */ -diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c -index dfdee2d..849d7a2 100644 ---- a/src/lj_ffrecord.c -+++ b/src/lj_ffrecord.c -@@ -1012,7 +1012,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) - handle_num: - tra = lj_ir_tonum(J, tra); - tr = lj_ir_call(J, id, tr, trsf, tra); -- if (LJ_SOFTFP) lj_needsplit(J); -+ if (LJ_SOFTFP32) lj_needsplit(J); - break; - case STRFMT_STR: - if (!tref_isstr(tra)) { -diff --git a/src/lj_ircall.h b/src/lj_ircall.h -index 973c36e..7312006 100644 ---- a/src/lj_ircall.h -+++ b/src/lj_ircall.h -@@ -51,7 +51,7 @@ typedef struct CCallInfo { - #define CCI_XARGS(ci) (((ci)->flags >> CCI_XARGS_SHIFT) & 3) - #define CCI_XA (1u << CCI_XARGS_SHIFT) - --#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) -+#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) - #define CCI_XNARGS(ci) (CCI_NARGS((ci)) + CCI_XARGS((ci))) - #else - #define CCI_XNARGS(ci) CCI_NARGS((ci)) -@@ -78,13 +78,19 @@ typedef struct CCallInfo { - #define IRCALLCOND_SOFTFP_FFI(x) NULL - #endif - --#if LJ_SOFTFP && LJ_TARGET_MIPS32 -+#if LJ_SOFTFP && LJ_TARGET_MIPS - #define IRCALLCOND_SOFTFP_MIPS(x) x - #else - #define IRCALLCOND_SOFTFP_MIPS(x) NULL - #endif - --#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS32) -+#if LJ_SOFTFP && LJ_TARGET_MIPS64 -+#define IRCALLCOND_SOFTFP_MIPS64(x) x -+#else -+#define IRCALLCOND_SOFTFP_MIPS64(x) NULL -+#endif -+ -+#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS) - - #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) - #define IRCALLCOND_FP64_FFI(x) x -@@ -112,6 +118,14 @@ typedef struct CCallInfo { - #define XA2_FP 0 - #endif - -+#if LJ_SOFTFP32 -+#define XA_FP32 CCI_XA -+#define XA2_FP32 (CCI_XA+CCI_XA) -+#else -+#define XA_FP32 0 -+#define XA2_FP32 0 -+#endif -+ - #if LJ_32 - #define XA_64 CCI_XA - #define XA2_64 (CCI_XA+CCI_XA) -@@ -181,20 +195,21 @@ typedef struct CCallInfo { - _(ANY, pow, 2, N, NUM, XA2_FP) \ - _(ANY, atan2, 2, N, NUM, XA2_FP) \ - _(ANY, ldexp, 2, N, NUM, XA_FP) \ -- _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \ -- _(SOFTFP, softfp_add, 4, N, NUM, 0) \ -- _(SOFTFP, softfp_sub, 4, N, NUM, 0) \ -- _(SOFTFP, softfp_mul, 4, N, NUM, 0) \ -- _(SOFTFP, softfp_div, 4, N, NUM, 0) \ -- _(SOFTFP, softfp_cmp, 4, N, NIL, 0) \ -+ _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \ -+ _(SOFTFP, softfp_add, 2, N, NUM, XA2_FP32) \ -+ _(SOFTFP, softfp_sub, 2, N, NUM, XA2_FP32) \ -+ _(SOFTFP, softfp_mul, 2, N, NUM, XA2_FP32) \ -+ _(SOFTFP, softfp_div, 2, N, NUM, XA2_FP32) \ -+ _(SOFTFP, softfp_cmp, 2, N, NIL, XA2_FP32) \ - _(SOFTFP, softfp_i2d, 1, N, NUM, 0) \ -- _(SOFTFP, softfp_d2i, 2, N, INT, 0) \ -- _(SOFTFP_MIPS, lj_vm_sfmin, 4, N, NUM, 0) \ -- _(SOFTFP_MIPS, lj_vm_sfmax, 4, N, NUM, 0) \ -+ _(SOFTFP, softfp_d2i, 1, N, INT, XA_FP32) \ -+ _(SOFTFP_MIPS, lj_vm_sfmin, 2, N, NUM, XA2_FP32) \ -+ _(SOFTFP_MIPS, lj_vm_sfmax, 2, N, NUM, XA2_FP32) \ -+ _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \ - _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \ - _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \ -- _(SOFTFP_FFI, softfp_d2ui, 2, N, INT, 0) \ -- _(SOFTFP_FFI, softfp_d2f, 2, N, FLOAT, 0) \ -+ _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \ -+ _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \ - _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \ - _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ - _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ -diff --git a/src/lj_iropt.h b/src/lj_iropt.h -index 73aef0e..a59ba3f 100644 ---- a/src/lj_iropt.h -+++ b/src/lj_iropt.h -@@ -150,7 +150,7 @@ LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase); - /* Optimization passes. */ - LJ_FUNC void lj_opt_dce(jit_State *J); - LJ_FUNC int lj_opt_loop(jit_State *J); --#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) -+#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) - LJ_FUNC void lj_opt_split(jit_State *J); - #else - #define lj_opt_split(J) UNUSED(J) -diff --git a/src/lj_jit.h b/src/lj_jit.h -index 2fa8efc..f37e792 100644 ---- a/src/lj_jit.h -+++ b/src/lj_jit.h -@@ -374,7 +374,7 @@ enum { - ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) - - /* Set/reset flag to activate the SPLIT pass for the current trace. */ --#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) -+#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) - #define lj_needsplit(J) (J->needsplit = 1) - #define lj_resetsplit(J) (J->needsplit = 0) - #else -@@ -437,7 +437,7 @@ typedef struct jit_State { - MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ - - PostProc postproc; /* Required post-processing after execution. */ --#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) -+#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) - uint8_t needsplit; /* Need SPLIT pass. */ - #endif - uint8_t retryrec; /* Retry recording. */ -diff --git a/src/lj_obj.h b/src/lj_obj.h -index 52372c3..c7e4742 100644 ---- a/src/lj_obj.h -+++ b/src/lj_obj.h -@@ -924,6 +924,9 @@ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2) - - #if LJ_SOFTFP - LJ_ASMF int32_t lj_vm_tobit(double x); -+#if LJ_TARGET_MIPS64 -+LJ_ASMF int32_t lj_vm_tointg(double x); -+#endif - #endif - - static LJ_AINLINE int32_t lj_num2bit(lua_Number n) -diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c -index fc93520..79ac3cc 100644 ---- a/src/lj_opt_split.c -+++ b/src/lj_opt_split.c -@@ -8,7 +8,7 @@ - - #include "lj_obj.h" - --#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) -+#if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) - - #include "lj_err.h" - #include "lj_buf.h" -diff --git a/src/lj_snap.c b/src/lj_snap.c -index bb063c2..44fa379 100644 ---- a/src/lj_snap.c -+++ b/src/lj_snap.c -@@ -93,7 +93,7 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) - (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) - sn |= SNAP_NORESTORE; - } -- if (LJ_SOFTFP && irt_isnum(ir->t)) -+ if (LJ_SOFTFP32 && irt_isnum(ir->t)) - sn |= SNAP_SOFTFPNUM; - map[n++] = sn; - } -@@ -374,7 +374,7 @@ IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir) - break; - } - } -- } else if (LJ_SOFTFP && ir->o == IR_HIOP) { -+ } else if (LJ_SOFTFP32 && ir->o == IR_HIOP) { - ref++; - } else if (ir->o == IR_PVAL) { - ref = ir->op1 + REF_BIAS; -@@ -486,7 +486,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T) - } else { - IRType t = irt_type(ir->t); - uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; -- if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; -+ if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; - if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); - tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); - } -@@ -520,7 +520,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T) - if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { - if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) - snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); -- else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && -+ else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && - irs+1 < irlast && (irs+1)->o == IR_HIOP) - snap_pref(J, T, map, nent, seen, (irs+1)->op2); - } -@@ -579,10 +579,10 @@ void lj_snap_replay(jit_State *J, GCtrace *T) - lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); - val = snap_pref(J, T, map, nent, seen, irc->op1); - val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); -- } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && -+ } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && - irs+1 < irlast && (irs+1)->o == IR_HIOP) { - IRType t = IRT_I64; -- if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP) -+ if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP) - t = IRT_NUM; - lj_needsplit(J); - if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { -@@ -635,7 +635,7 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, - int32_t *sps = &ex->spill[regsp_spill(rs)]; - if (irt_isinteger(t)) { - setintV(o, *sps); --#if !LJ_SOFTFP -+#if !LJ_SOFTFP32 - } else if (irt_isnum(t)) { - o->u64 = *(uint64_t *)sps; - #endif -@@ -660,6 +660,9 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, - #if !LJ_SOFTFP - } else if (irt_isnum(t)) { - setnumV(o, ex->fpr[r-RID_MIN_FPR]); -+#elif LJ_64 /* && LJ_SOFTFP */ -+ } else if (irt_isnum(t)) { -+ o->u64 = ex->gpr[r-RID_MIN_GPR]; - #endif - #if LJ_64 && !LJ_GC64 - } else if (irt_is64(t)) { -@@ -813,7 +816,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, - val = lj_tab_set(J->L, t, &tmp); - /* NOBARRIER: The table is new (marked white). */ - snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); -- if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { -+ if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { - snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); - val->u32.hi = tmp.u32.lo; - } -@@ -874,7 +877,7 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) - continue; - } - snap_restoreval(J, T, ex, snapno, rfilt, ref, o); -- if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { -+ if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { - TValue tmp; - snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); - o->u32.hi = tmp.u32.lo; -diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc -index c06270a..75b38de 100644 ---- a/src/vm_mips64.dasc -+++ b/src/vm_mips64.dasc -@@ -1980,6 +1980,38 @@ static void build_subroutines(BuildCtx *ctx) - |1: - | jr ra - |. move CRET1, r0 -+ | -+ |// FP number to int conversion with a check for soft-float. -+ |// Modifies CARG1, CRET1, CRET2, TMP0, AT. -+ |->vm_tointg: -+ |.if JIT -+ | dsll CRET2, CARG1, 1 -+ | beqz CRET2, >2 -+ |. li TMP0, 1076 -+ | dsrl AT, CRET2, 53 -+ | dsubu TMP0, TMP0, AT -+ | sltiu AT, TMP0, 54 -+ | beqz AT, >1 -+ |. dextm CRET2, CRET2, 0, 20 -+ | dinsu CRET2, AT, 21, 21 -+ | slt AT, CARG1, r0 -+ | dsrlv CRET1, CRET2, TMP0 -+ | dsubu CARG1, r0, CRET1 -+ | movn CRET1, CARG1, AT -+ | li CARG1, 64 -+ | subu TMP0, CARG1, TMP0 -+ | dsllv CRET2, CRET2, TMP0 // Integer check. -+ | sextw AT, CRET1 -+ | xor AT, CRET1, AT // Range check. -+ | jr ra -+ |. movz CRET2, AT, CRET2 -+ |1: -+ | jr ra -+ |. li CRET2, 1 -+ |2: -+ | jr ra -+ |. move CRET1, r0 -+ |.endif - |.endif - | - |.macro .ffunc_bit, name -@@ -2665,6 +2697,23 @@ static void build_subroutines(BuildCtx *ctx) - |. li CRET1, 0 - |.endif - | -+ |.macro sfmin_max, name, intins -+ |->vm_sf .. name: -+ |.if JIT and not FPU -+ | move TMP2, ra -+ | bal ->vm_sfcmpolt -+ |. nop -+ | move ra, TMP2 -+ | move TMP0, CRET1 -+ | move CRET1, CARG1 -+ | jr ra -+ |. intins CRET1, CARG2, TMP0 -+ |.endif -+ |.endmacro -+ | -+ | sfmin_max min, movz -+ | sfmin_max max, movn -+ | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- --- -2.20.1 - diff --git a/0006-FreeBSD-x64-Avoid-changing-resource-limits-if-not-ne.patch b/0006-FreeBSD-x64-Avoid-changing-resource-limits-if-not-ne.patch deleted file mode 100644 index 133018d..0000000 --- a/0006-FreeBSD-x64-Avoid-changing-resource-limits-if-not-ne.patch +++ /dev/null @@ -1,26 +0,0 @@ -From b0ecc6dd65a0b40e1868f20719c4f7c4880dc32d Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Thu, 8 Jun 2017 00:15:15 +0200 -Subject: [PATCH 06/72] FreeBSD/x64: Avoid changing resource limits, if not - needed. - ---- - src/lj_alloc.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/lj_alloc.c b/src/lj_alloc.c -index 95d15d0..9fc761c 100644 ---- a/src/lj_alloc.c -+++ b/src/lj_alloc.c -@@ -343,7 +343,7 @@ static void *CALL_MMAP(size_t size) - } - #endif - --#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 -+#if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__ < 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 - - #include - --- -2.20.1 - diff --git a/0007-Remove-unused-define.patch b/0007-Remove-unused-define.patch deleted file mode 100644 index c4729e1..0000000 --- a/0007-Remove-unused-define.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 6a71e71c1430e5a8f794a52cb2da66e2693db796 Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Sun, 11 Jun 2017 10:02:08 +0200 -Subject: [PATCH 07/72] Remove unused define. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Suggested by 罗泽轩. ---- - src/lj_def.h | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/src/lj_def.h b/src/lj_def.h -index 2d8fff6..e67bb24 100644 ---- a/src/lj_def.h -+++ b/src/lj_def.h -@@ -80,7 +80,6 @@ typedef unsigned int uintptr_t; - #define LJ_MIN_SBUF 32 /* Min. string buffer length. */ - #define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */ - #define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */ --#define LJ_MIN_K64SZ 16 /* Min. size for chained K64Array. */ - - /* JIT compiler limits. */ - #define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */ --- -2.20.1 - diff --git a/0008-Modify-fix-for-warning-from-ar.patch b/0008-Modify-fix-for-warning-from-ar.patch deleted file mode 100644 index 4d9b0e4..0000000 --- a/0008-Modify-fix-for-warning-from-ar.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 82151a4514e6538086f3f5e01cb8d4b22287b14f Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Mon, 12 Jun 2017 09:24:00 +0200 -Subject: [PATCH 08/72] Modify fix for warning from 'ar'. - ---- - src/Makefile | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/src/Makefile b/src/Makefile -index f7f81a4..24e8c0e 100644 ---- a/src/Makefile -+++ b/src/Makefile -@@ -208,7 +208,7 @@ TARGET_CC= $(STATIC_CC) - TARGET_STCC= $(STATIC_CC) - TARGET_DYNCC= $(DYNAMIC_CC) - TARGET_LD= $(CROSS)$(CC) --TARGET_AR= $(CROSS)ar rcus 2>/dev/null -+TARGET_AR= $(CROSS)ar rcus - TARGET_STRIP= $(CROSS)strip - - TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib) -@@ -293,6 +293,7 @@ ifeq (Windows,$(TARGET_SYS)) - TARGET_XSHLDFLAGS= -shared - TARGET_DYNXLDOPTS= - else -+ TARGET_AR+= 2>/dev/null - ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1)) - TARGET_XCFLAGS+= -fno-stack-protector - endif --- -2.20.1 - diff --git a/0009-x64-LJ_GC64-Fix-emit_rma.patch b/0009-x64-LJ_GC64-Fix-emit_rma.patch deleted file mode 100644 index ff59f09..0000000 --- a/0009-x64-LJ_GC64-Fix-emit_rma.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 7e662e4f87134f1e84f7bea80933e033c5bf53a3 Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Wed, 26 Jul 2017 09:52:53 +0200 -Subject: [PATCH 09/72] x64/LJ_GC64: Fix emit_rma(). - ---- - src/lj_emit_x86.h | 24 +++++++++++++++++++++--- - 1 file changed, 21 insertions(+), 3 deletions(-) - -diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h -index 5207f9d..5b139bd 100644 ---- a/src/lj_emit_x86.h -+++ b/src/lj_emit_x86.h -@@ -343,9 +343,27 @@ static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) - emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr)); - } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) { - emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr)); -- } else if (!checki32((intptr_t)addr) && (xo == XO_MOV || xo == XO_MOVSD)) { -- emit_rmro(as, xo, rr, rr, 0); -- emit_loadu64(as, rr, (uintptr_t)addr); -+ } else if (!checki32((intptr_t)addr)) { -+ Reg ra = (rr & 15); -+ if (xo != XO_MOV) { -+ /* We can't allocate a register here. Use and restore DISPATCH. Ugly. */ -+ uint64_t dispaddr = (uintptr_t)J2GG(as->J)->dispatch; -+ uint8_t i8 = xo == XO_GROUP3b ? *as->mcp++ : 0; -+ ra = RID_DISPATCH; -+ if (checku32(dispaddr)) { -+ emit_loadi(as, ra, (int32_t)dispaddr); -+ } else { /* Full-size 64 bit load. */ -+ MCode *p = as->mcp; -+ *(uint64_t *)(p-8) = dispaddr; -+ p[-9] = (MCode)(XI_MOVri+(ra&7)); -+ p[-10] = 0x48 + ((ra>>3)&1); -+ p -= 10; -+ as->mcp = p; -+ } -+ if (xo == XO_GROUP3b) emit_i8(as, i8); -+ } -+ emit_rmro(as, xo, rr, ra, 0); -+ emit_loadu64(as, ra, (uintptr_t)addr); - } else - #endif - { --- -2.20.1 - diff --git a/0010-PPC-Add-soft-float-support-to-interpreter.patch b/0010-PPC-Add-soft-float-support-to-interpreter.patch deleted file mode 100644 index 52d3638..0000000 --- a/0010-PPC-Add-soft-float-support-to-interpreter.patch +++ /dev/null @@ -1,2761 +0,0 @@ -From fd37da0d586c331b0008fbfd653a9659344fe76f Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Wed, 26 Jul 2017 09:52:19 +0200 -Subject: [PATCH 10/72] PPC: Add soft-float support to interpreter. - -Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. -Sponsored by Cisco Systems, Inc. ---- - src/host/buildvm_asm.c | 2 +- - src/lj_arch.h | 29 +- - src/lj_ccall.c | 38 +- - src/lj_ccall.h | 4 +- - src/lj_ccallback.c | 30 +- - src/lj_frame.h | 2 +- - src/lj_ircall.h | 2 +- - src/vm_ppc.dasc | 1249 +++++++++++++++++++++++++++++++++------- - 8 files changed, 1101 insertions(+), 255 deletions(-) - -diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c -index ffd1490..43595b3 100644 ---- a/src/host/buildvm_asm.c -+++ b/src/host/buildvm_asm.c -@@ -338,7 +338,7 @@ void emit_asm(BuildCtx *ctx) - #if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA) - fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n"); - #endif --#if LJ_TARGET_PPC && !LJ_TARGET_PS3 -+#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP - /* Hard-float ABI. */ - fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n"); - #endif -diff --git a/src/lj_arch.h b/src/lj_arch.h -index b770564..0145a7c 100644 ---- a/src/lj_arch.h -+++ b/src/lj_arch.h -@@ -254,6 +254,29 @@ - #else - #define LJ_ARCH_BITS 32 - #define LJ_ARCH_NAME "ppc" -+ -+#if !defined(LJ_ARCH_HASFPU) -+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) -+#define LJ_ARCH_HASFPU 0 -+#else -+#define LJ_ARCH_HASFPU 1 -+#endif -+#endif -+ -+#if !defined(LJ_ABI_SOFTFP) -+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) -+#define LJ_ABI_SOFTFP 1 -+#else -+#define LJ_ABI_SOFTFP 0 -+#endif -+#endif -+#endif -+ -+#if LJ_ABI_SOFTFP -+#define LJ_ARCH_NOJIT 1 /* NYI */ -+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL -+#else -+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE - #endif - - #define LJ_TARGET_PPC 1 -@@ -262,7 +285,6 @@ - #define LJ_TARGET_MASKSHIFT 0 - #define LJ_TARGET_MASKROT 1 - #define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */ --#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE - - #if LJ_TARGET_CONSOLE - #define LJ_ARCH_PPC32ON64 1 -@@ -415,16 +437,13 @@ - #error "No support for ILP32 model on ARM64" - #endif - #elif LJ_TARGET_PPC --#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) --#error "No support for PowerPC CPUs without double-precision FPU" --#endif - #if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE - #error "No support for little-endian PPC32" - #endif - #if LJ_ARCH_PPC64 - #error "No support for PowerPC 64 bit mode (yet)" - #endif --#ifdef __NO_FPRS__ -+#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT) - #error "No support for PPC/e500 anymore (use LuaJIT 2.0)" - #endif - #elif LJ_TARGET_MIPS32 -diff --git a/src/lj_ccall.c b/src/lj_ccall.c -index 5c252e5..799be48 100644 ---- a/src/lj_ccall.c -+++ b/src/lj_ccall.c -@@ -387,6 +387,24 @@ - #define CCALL_HANDLE_COMPLEXARG \ - /* Pass complex by value in 2 or 4 GPRs. */ - -+#define CCALL_HANDLE_GPR \ -+ /* Try to pass argument in GPRs. */ \ -+ if (n > 1) { \ -+ lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \ -+ if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \ -+ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ -+ else if (ngpr + n > maxgpr) \ -+ ngpr = maxgpr; /* Prevent reordering. */ \ -+ } \ -+ if (ngpr + n <= maxgpr) { \ -+ dp = &cc->gpr[ngpr]; \ -+ ngpr += n; \ -+ goto done; \ -+ } \ -+ -+#if LJ_ABI_SOFTFP -+#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR -+#else - #define CCALL_HANDLE_REGARG \ - if (isfp) { /* Try to pass argument in FPRs. */ \ - if (nfpr + 1 <= CCALL_NARG_FPR) { \ -@@ -395,24 +413,16 @@ - d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ - goto done; \ - } \ -- } else { /* Try to pass argument in GPRs. */ \ -- if (n > 1) { \ -- lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \ -- if (ctype_isinteger(d->info)) \ -- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ -- else if (ngpr + n > maxgpr) \ -- ngpr = maxgpr; /* Prevent reordering. */ \ -- } \ -- if (ngpr + n <= maxgpr) { \ -- dp = &cc->gpr[ngpr]; \ -- ngpr += n; \ -- goto done; \ -- } \ -+ } else { \ -+ CCALL_HANDLE_GPR \ - } -+#endif - -+#if !LJ_ABI_SOFTFP - #define CCALL_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ -+#endif - - #elif LJ_TARGET_MIPS32 - /* -- MIPS o32 calling conventions ---------------------------------------- */ -@@ -1080,7 +1090,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, - } - if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ - --#if LJ_TARGET_X64 || LJ_TARGET_PPC -+#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) - cc->nfpr = nfpr; /* Required for vararg functions. */ - #endif - cc->nsp = nsp; -diff --git a/src/lj_ccall.h b/src/lj_ccall.h -index 59f6648..6efa48c 100644 ---- a/src/lj_ccall.h -+++ b/src/lj_ccall.h -@@ -86,9 +86,9 @@ typedef union FPRArg { - #elif LJ_TARGET_PPC - - #define CCALL_NARG_GPR 8 --#define CCALL_NARG_FPR 8 -+#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8) - #define CCALL_NRET_GPR 4 /* For complex double. */ --#define CCALL_NRET_FPR 1 -+#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1) - #define CCALL_SPS_EXTRA 4 - #define CCALL_SPS_FREE 0 - -diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c -index 846827b..03494a7 100644 ---- a/src/lj_ccallback.c -+++ b/src/lj_ccallback.c -@@ -419,6 +419,23 @@ void lj_ccallback_mcode_free(CTState *cts) - - #elif LJ_TARGET_PPC - -+#define CALLBACK_HANDLE_GPR \ -+ if (n > 1) { \ -+ lua_assert(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */ \ -+ ctype_isinteger(cta->info)) && n == 2); /* int64_t. */ \ -+ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ -+ } \ -+ if (ngpr + n <= maxgpr) { \ -+ sp = &cts->cb.gpr[ngpr]; \ -+ ngpr += n; \ -+ goto done; \ -+ } -+ -+#if LJ_ABI_SOFTFP -+#define CALLBACK_HANDLE_REGARG \ -+ CALLBACK_HANDLE_GPR \ -+ UNUSED(isfp); -+#else - #define CALLBACK_HANDLE_REGARG \ - if (isfp) { \ - if (nfpr + 1 <= CCALL_NARG_FPR) { \ -@@ -427,20 +444,15 @@ void lj_ccallback_mcode_free(CTState *cts) - goto done; \ - } \ - } else { /* Try to pass argument in GPRs. */ \ -- if (n > 1) { \ -- lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \ -- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ -- } \ -- if (ngpr + n <= maxgpr) { \ -- sp = &cts->cb.gpr[ngpr]; \ -- ngpr += n; \ -- goto done; \ -- } \ -+ CALLBACK_HANDLE_GPR \ - } -+#endif - -+#if !LJ_ABI_SOFTFP - #define CALLBACK_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ -+#endif - - #elif LJ_TARGET_MIPS32 - -diff --git a/src/lj_frame.h b/src/lj_frame.h -index 19c49a4..04cb5a3 100644 ---- a/src/lj_frame.h -+++ b/src/lj_frame.h -@@ -226,7 +226,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ - #define CFRAME_OFS_L 36 - #define CFRAME_OFS_PC 32 - #define CFRAME_OFS_MULTRES 28 --#define CFRAME_SIZE 272 -+#define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128) - #define CFRAME_SHIFT_MULTRES 3 - #endif - #elif LJ_TARGET_MIPS32 -diff --git a/src/lj_ircall.h b/src/lj_ircall.h -index 7312006..9b3883b 100644 ---- a/src/lj_ircall.h -+++ b/src/lj_ircall.h -@@ -287,7 +287,7 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; - #define fp64_f2l __aeabi_f2lz - #define fp64_f2ul __aeabi_f2ulz - #endif --#elif LJ_TARGET_MIPS -+#elif LJ_TARGET_MIPS || LJ_TARGET_PPC - #define softfp_add __adddf3 - #define softfp_sub __subdf3 - #define softfp_mul __muldf3 -diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc -index b4260eb..0839668 100644 ---- a/src/vm_ppc.dasc -+++ b/src/vm_ppc.dasc -@@ -103,6 +103,18 @@ - |// Fixed register assignments for the interpreter. - |// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) - | -+|.macro .FPU, a, b -+|.if FPU -+| a, b -+|.endif -+|.endmacro -+| -+|.macro .FPU, a, b, c -+|.if FPU -+| a, b, c -+|.endif -+|.endmacro -+| - |// The following must be C callee-save (but BASE is often refetched). - |.define BASE, r14 // Base of current Lua stack frame. - |.define KBASE, r15 // Constants of current Lua function. -@@ -116,8 +128,10 @@ - |.define TISNUM, r22 - |.define TISNIL, r23 - |.define ZERO, r24 -+|.if FPU - |.define TOBIT, f30 // 2^52 + 2^51. - |.define TONUM, f31 // 2^52 + 2^51 + 2^31. -+|.endif - | - |// The following temporaries are not saved across C calls, except for RA. - |.define RA, r20 // Callee-save. -@@ -133,6 +147,7 @@ - | - |// Saved temporaries. - |.define SAVE0, r21 -+|.define SAVE1, r25 - | - |// Calling conventions. - |.define CARG1, r3 -@@ -141,8 +156,10 @@ - |.define CARG4, r6 // Overlaps TMP3. - |.define CARG5, r7 // Overlaps INS. - | -+|.if FPU - |.define FARG1, f1 - |.define FARG2, f2 -+|.endif - | - |.define CRET1, r3 - |.define CRET2, r4 -@@ -213,10 +230,16 @@ - |.endif - |.else - | -+|.if FPU - |.define SAVE_LR, 276(sp) - |.define CFRAME_SPACE, 272 // Delta for sp. - |// Back chain for sp: 272(sp) <-- sp entering interpreter - |.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. -+|.else -+|.define SAVE_LR, 132(sp) -+|.define CFRAME_SPACE, 128 // Delta for sp. -+|// Back chain for sp: 128(sp) <-- sp entering interpreter -+|.endif - |.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. - |.define SAVE_CR, 52(sp) // 32 bit CR save. - |.define SAVE_ERRF, 48(sp) // 32 bit C frame info. -@@ -226,16 +249,25 @@ - |.define SAVE_PC, 32(sp) - |.define SAVE_MULTRES, 28(sp) - |.define UNUSED1, 24(sp) -+|.if FPU - |.define TMPD_LO, 20(sp) - |.define TMPD_HI, 16(sp) - |.define TONUM_LO, 12(sp) - |.define TONUM_HI, 8(sp) -+|.else -+|.define SFSAVE_4, 20(sp) -+|.define SFSAVE_3, 16(sp) -+|.define SFSAVE_2, 12(sp) -+|.define SFSAVE_1, 8(sp) -+|.endif - |// Next frame lr: 4(sp) - |// Back chain for sp: 0(sp) <-- sp while in interpreter - | -+|.if FPU - |.define TMPD_BLO, 23(sp) - |.define TMPD, TMPD_HI - |.define TONUM_D, TONUM_HI -+|.endif - | - |.endif - | -@@ -245,7 +277,7 @@ - |.else - | stw r..reg, SAVE_GPR_+(reg-14)*4(sp) - |.endif --| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) -+| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) - |.endmacro - |.macro rest_, reg - |.if GPR64 -@@ -253,7 +285,7 @@ - |.else - | lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) - |.endif --| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) -+| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) - |.endmacro - | - |.macro saveregs -@@ -323,6 +355,7 @@ - |// Trap for not-yet-implemented parts. - |.macro NYI; tw 4, sp, sp; .endmacro - | -+|.if FPU - |// int/FP conversions. - |.macro tonum_i, freg, reg - | xoris reg, reg, 0x8000 -@@ -346,6 +379,7 @@ - |.macro toint, reg, freg - | toint reg, freg, freg - |.endmacro -+|.endif - | - |//----------------------------------------------------------------------- - | -@@ -533,9 +567,19 @@ static void build_subroutines(BuildCtx *ctx) - | beq >2 - |1: - | addic. TMP1, TMP1, -8 -+ |.if FPU - | lfd f0, 0(RA) -+ |.else -+ | lwz CARG1, 0(RA) -+ | lwz CARG2, 4(RA) -+ |.endif - | addi RA, RA, 8 -+ |.if FPU - | stfd f0, 0(BASE) -+ |.else -+ | stw CARG1, 0(BASE) -+ | stw CARG2, 4(BASE) -+ |.endif - | addi BASE, BASE, 8 - | bney <1 - | -@@ -613,23 +657,23 @@ static void build_subroutines(BuildCtx *ctx) - | .toc ld TOCREG, SAVE_TOC - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lp BASE, L->base -- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). -+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | lwz DISPATCH, L->glref // Setup pointer to dispatch table. - | li ZERO, 0 -- | stw TMP3, TMPD -+ | .FPU stw TMP3, TMPD - | li TMP1, LJ_TFALSE -- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | li TISNIL, LJ_TNIL - | li_vmstate INTERP -- | lfs TOBIT, TMPD -+ | .FPU lfs TOBIT, TMPD - | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. - | la RA, -8(BASE) // Results start at BASE-8. -- | stw TMP3, TMPD -+ | .FPU stw TMP3, TMPD - | addi DISPATCH, DISPATCH, GG_G2DISP - | stw TMP1, 0(RA) // Prepend false to error message. - | li RD, 16 // 2 results: false + error message. - | st_vmstate -- | lfs TONUM, TMPD -+ | .FPU lfs TONUM, TMPD - | b ->vm_returnc - | - |//----------------------------------------------------------------------- -@@ -690,22 +734,22 @@ static void build_subroutines(BuildCtx *ctx) - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lp TMP1, L->top - | lwz PC, FRAME_PC(BASE) -- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). -+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | stb CARG3, L->status -- | stw TMP3, TMPD -- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -- | lfs TOBIT, TMPD -+ | .FPU stw TMP3, TMPD -+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -+ | .FPU lfs TOBIT, TMPD - | sub RD, TMP1, BASE -- | stw TMP3, TMPD -- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) -+ | .FPU stw TMP3, TMPD -+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) - | addi RD, RD, 8 -- | stw TMP0, TONUM_HI -+ | .FPU stw TMP0, TONUM_HI - | li_vmstate INTERP - | li ZERO, 0 - | st_vmstate - | andix. TMP0, PC, FRAME_TYPE - | mr MULTRES, RD -- | lfs TONUM, TMPD -+ | .FPU lfs TONUM, TMPD - | li TISNIL, LJ_TNIL - | beq ->BC_RET_Z - | b ->vm_return -@@ -739,19 +783,19 @@ static void build_subroutines(BuildCtx *ctx) - | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lp TMP1, L->top -- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). -+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | add PC, PC, BASE -- | stw TMP3, TMPD -+ | .FPU stw TMP3, TMPD - | li ZERO, 0 -- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -- | lfs TOBIT, TMPD -+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -+ | .FPU lfs TOBIT, TMPD - | sub PC, PC, TMP2 // PC = frame delta + frame type -- | stw TMP3, TMPD -- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) -+ | .FPU stw TMP3, TMPD -+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) - | sub NARGS8:RC, TMP1, BASE -- | stw TMP0, TONUM_HI -+ | .FPU stw TMP0, TONUM_HI - | li_vmstate INTERP -- | lfs TONUM, TMPD -+ | .FPU lfs TONUM, TMPD - | li TISNIL, LJ_TNIL - | st_vmstate - | -@@ -839,15 +883,30 @@ static void build_subroutines(BuildCtx *ctx) - | lwz INS, -4(PC) - | subi CARG2, RB, 16 - | decode_RB8 SAVE0, INS -+ |.if FPU - | lfd f0, 0(RA) -+ |.else -+ | lwz TMP2, 0(RA) -+ | lwz TMP3, 4(RA) -+ |.endif - | add TMP1, BASE, SAVE0 - | stp BASE, L->base - | cmplw TMP1, CARG2 - | sub CARG3, CARG2, TMP1 - | decode_RA8 RA, INS -+ |.if FPU - | stfd f0, 0(CARG2) -+ |.else -+ | stw TMP2, 0(CARG2) -+ | stw TMP3, 4(CARG2) -+ |.endif - | bney ->BC_CAT_Z -+ |.if FPU - | stfdx f0, BASE, RA -+ |.else -+ | stwux TMP2, RA, BASE -+ | stw TMP3, 4(RA) -+ |.endif - | b ->cont_nop - | - |//-- Table indexing metamethods ----------------------------------------- -@@ -900,9 +959,19 @@ static void build_subroutines(BuildCtx *ctx) - | // Returns TValue * (finished) or NULL (metamethod). - | cmplwi CRET1, 0 - | beq >3 -+ |.if FPU - | lfd f0, 0(CRET1) -+ |.else -+ | lwz TMP0, 0(CRET1) -+ | lwz TMP1, 4(CRET1) -+ |.endif - | ins_next1 -+ |.if FPU - | stfdx f0, BASE, RA -+ |.else -+ | stwux TMP0, RA, BASE -+ | stw TMP1, 4(RA) -+ |.endif - | ins_next2 - | - |3: // Call __index metamethod. -@@ -920,7 +989,12 @@ static void build_subroutines(BuildCtx *ctx) - | // Returns cTValue * or NULL. - | cmplwi CRET1, 0 - | beq >1 -+ |.if FPU - | lfd f14, 0(CRET1) -+ |.else -+ | lwz SAVE0, 0(CRET1) -+ | lwz SAVE1, 4(CRET1) -+ |.endif - | b ->BC_TGETR_Z - |1: - | stwx TISNIL, BASE, RA -@@ -975,11 +1049,21 @@ static void build_subroutines(BuildCtx *ctx) - | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | cmplwi CRET1, 0 -+ |.if FPU - | lfdx f0, BASE, RA -+ |.else -+ | lwzux TMP2, RA, BASE -+ | lwz TMP3, 4(RA) -+ |.endif - | beq >3 - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | ins_next1 -+ |.if FPU - | stfd f0, 0(CRET1) -+ |.else -+ | stw TMP2, 0(CRET1) -+ | stw TMP3, 4(CRET1) -+ |.endif - | ins_next2 - | - |3: // Call __newindex metamethod. -@@ -990,7 +1074,12 @@ static void build_subroutines(BuildCtx *ctx) - | add PC, TMP1, BASE - | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | li NARGS8:RC, 24 // 3 args for func(t, k, v) -+ |.if FPU - | stfd f0, 16(BASE) // Copy value to third argument. -+ |.else -+ | stw TMP2, 16(BASE) -+ | stw TMP3, 20(BASE) -+ |.endif - | b ->vm_call_dispatch_f - | - |->vmeta_tsetr: -@@ -998,7 +1087,12 @@ static void build_subroutines(BuildCtx *ctx) - | stw PC, SAVE_PC - | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) - | // Returns TValue *. -+ |.if FPU - | stfd f14, 0(CRET1) -+ |.else -+ | stw SAVE0, 0(CRET1) -+ | stw SAVE1, 4(CRET1) -+ |.endif - | b ->cont_nop - | - |//-- Comparison metamethods --------------------------------------------- -@@ -1037,9 +1131,19 @@ static void build_subroutines(BuildCtx *ctx) - | - |->cont_ra: // RA = resultptr - | lwz INS, -4(PC) -+ |.if FPU - | lfd f0, 0(RA) -+ |.else -+ | lwz CARG1, 0(RA) -+ | lwz CARG2, 4(RA) -+ |.endif - | decode_RA8 TMP1, INS -+ |.if FPU - | stfdx f0, BASE, TMP1 -+ |.else -+ | stwux CARG1, TMP1, BASE -+ | stw CARG2, 4(TMP1) -+ |.endif - | b ->cont_nop - | - |->cont_condt: // RA = resultptr -@@ -1245,22 +1349,32 @@ static void build_subroutines(BuildCtx *ctx) - |.macro .ffunc_n, name - |->ff_ .. name: - | cmplwi NARGS8:RC, 8 -- | lwz CARG3, 0(BASE) -+ | lwz CARG1, 0(BASE) -+ |.if FPU - | lfd FARG1, 0(BASE) -+ |.else -+ | lwz CARG2, 4(BASE) -+ |.endif - | blt ->fff_fallback -- | checknum CARG3; bge ->fff_fallback -+ | checknum CARG1; bge ->fff_fallback - |.endmacro - | - |.macro .ffunc_nn, name - |->ff_ .. name: - | cmplwi NARGS8:RC, 16 -- | lwz CARG3, 0(BASE) -+ | lwz CARG1, 0(BASE) -+ |.if FPU - | lfd FARG1, 0(BASE) -- | lwz CARG4, 8(BASE) -+ | lwz CARG3, 8(BASE) - | lfd FARG2, 8(BASE) -+ |.else -+ | lwz CARG2, 4(BASE) -+ | lwz CARG3, 8(BASE) -+ | lwz CARG4, 12(BASE) -+ |.endif - | blt ->fff_fallback -+ | checknum CARG1; bge ->fff_fallback - | checknum CARG3; bge ->fff_fallback -- | checknum CARG4; bge ->fff_fallback - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. -@@ -1281,14 +1395,21 @@ static void build_subroutines(BuildCtx *ctx) - | bge cr1, ->fff_fallback - | stw CARG3, 0(RA) - | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. -+ | addi TMP1, BASE, 8 -+ | add TMP2, RA, NARGS8:RC - | stw CARG1, 4(RA) - | beq ->fff_res // Done if exactly 1 argument. -- | li TMP1, 8 -- | subi RC, RC, 8 - |1: -- | cmplw TMP1, RC -- | lfdx f0, BASE, TMP1 -- | stfdx f0, RA, TMP1 -+ | cmplw TMP1, TMP2 -+ |.if FPU -+ | lfd f0, 0(TMP1) -+ | stfd f0, 0(TMP1) -+ |.else -+ | lwz CARG1, 0(TMP1) -+ | lwz CARG2, 4(TMP1) -+ | stw CARG1, -8(TMP1) -+ | stw CARG2, -4(TMP1) -+ |.endif - | addi TMP1, TMP1, 8 - | bney <1 - | b ->fff_res -@@ -1303,8 +1424,14 @@ static void build_subroutines(BuildCtx *ctx) - | orc TMP1, TMP2, TMP0 - | addi TMP1, TMP1, ~LJ_TISNUM+1 - | slwi TMP1, TMP1, 3 -+ |.if FPU - | la TMP2, CFUNC:RB->upvalue - | lfdx FARG1, TMP2, TMP1 -+ |.else -+ | add TMP1, CFUNC:RB, TMP1 -+ | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi -+ | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo -+ |.endif - | b ->fff_resn - | - |//-- Base library: getters and setters --------------------------------- -@@ -1382,7 +1509,12 @@ static void build_subroutines(BuildCtx *ctx) - | mr CARG1, L - | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - | // Returns cTValue *. -+ |.if FPU - | lfd FARG1, 0(CRET1) -+ |.else -+ | lwz CARG2, 4(CRET1) -+ | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1. -+ |.endif - | b ->fff_resn - | - |//-- Base library: conversions ------------------------------------------ -@@ -1391,7 +1523,11 @@ static void build_subroutines(BuildCtx *ctx) - | // Only handles the number case inline (without a base argument). - | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) -+ |.if FPU - | lfd FARG1, 0(BASE) -+ |.else -+ | lwz CARG2, 4(BASE) -+ |.endif - | bne ->fff_fallback // Exactly one argument. - | checknum CARG1; bgt ->fff_fallback - | b ->fff_resn -@@ -1442,12 +1578,23 @@ static void build_subroutines(BuildCtx *ctx) - | cmplwi CRET1, 0 - | li CARG3, LJ_TNIL - | beq ->fff_restv // End of traversal: return nil. -- | lfd f0, 8(BASE) // Copy key and value to results. - | la RA, -8(BASE) -+ |.if FPU -+ | lfd f0, 8(BASE) // Copy key and value to results. - | lfd f1, 16(BASE) - | stfd f0, 0(RA) -- | li RD, (2+1)*8 - | stfd f1, 8(RA) -+ |.else -+ | lwz CARG1, 8(BASE) -+ | lwz CARG2, 12(BASE) -+ | lwz CARG3, 16(BASE) -+ | lwz CARG4, 20(BASE) -+ | stw CARG1, 0(RA) -+ | stw CARG2, 4(RA) -+ | stw CARG3, 8(RA) -+ | stw CARG4, 12(RA) -+ |.endif -+ | li RD, (2+1)*8 - | b ->fff_res - | - |.ffunc_1 pairs -@@ -1456,17 +1603,32 @@ static void build_subroutines(BuildCtx *ctx) - | bne ->fff_fallback - #if LJ_52 - | lwz TAB:TMP2, TAB:CARG1->metatable -+ |.if FPU - | lfd f0, CFUNC:RB->upvalue[0] -+ |.else -+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi -+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo -+ |.endif - | cmplwi TAB:TMP2, 0 - | la RA, -8(BASE) - | bne ->fff_fallback - #else -+ |.if FPU - | lfd f0, CFUNC:RB->upvalue[0] -+ |.else -+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi -+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo -+ |.endif - | la RA, -8(BASE) - #endif - | stw TISNIL, 8(BASE) - | li RD, (3+1)*8 -+ |.if FPU - | stfd f0, 0(RA) -+ |.else -+ | stw TMP0, 0(RA) -+ | stw TMP1, 4(RA) -+ |.endif - | b ->fff_res - | - |.ffunc ipairs_aux -@@ -1512,14 +1674,24 @@ static void build_subroutines(BuildCtx *ctx) - | stfd FARG2, 0(RA) - |.endif - | ble >2 // Not in array part? -+ |.if FPU - | lwzx TMP2, TMP1, TMP3 - | lfdx f0, TMP1, TMP3 -+ |.else -+ | lwzux TMP2, TMP1, TMP3 -+ | lwz TMP3, 4(TMP1) -+ |.endif - |1: - | checknil TMP2 - | li RD, (0+1)*8 - | beq ->fff_res // End of iteration, return 0 results. - | li RD, (2+1)*8 -+ |.if FPU - | stfd f0, 8(RA) -+ |.else -+ | stw TMP2, 8(RA) -+ | stw TMP3, 12(RA) -+ |.endif - | b ->fff_res - |2: // Check for empty hash part first. Otherwise call C function. - | lwz TMP0, TAB:CARG1->hmask -@@ -1533,7 +1705,11 @@ static void build_subroutines(BuildCtx *ctx) - | li RD, (0+1)*8 - | beq ->fff_res - | lwz TMP2, 0(CRET1) -+ |.if FPU - | lfd f0, 0(CRET1) -+ |.else -+ | lwz TMP3, 4(CRET1) -+ |.endif - | b <1 - | - |.ffunc_1 ipairs -@@ -1542,12 +1718,22 @@ static void build_subroutines(BuildCtx *ctx) - | bne ->fff_fallback - #if LJ_52 - | lwz TAB:TMP2, TAB:CARG1->metatable -+ |.if FPU - | lfd f0, CFUNC:RB->upvalue[0] -+ |.else -+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi -+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo -+ |.endif - | cmplwi TAB:TMP2, 0 - | la RA, -8(BASE) - | bne ->fff_fallback - #else -+ |.if FPU - | lfd f0, CFUNC:RB->upvalue[0] -+ |.else -+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi -+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo -+ |.endif - | la RA, -8(BASE) - #endif - |.if DUALNUM -@@ -1557,7 +1743,12 @@ static void build_subroutines(BuildCtx *ctx) - |.endif - | stw ZERO, 12(BASE) - | li RD, (3+1)*8 -+ |.if FPU - | stfd f0, 0(RA) -+ |.else -+ | stw TMP0, 0(RA) -+ | stw TMP1, 4(RA) -+ |.endif - | b ->fff_res - | - |//-- Base library: catch errors ---------------------------------------- -@@ -1576,19 +1767,32 @@ static void build_subroutines(BuildCtx *ctx) - | - |.ffunc xpcall - | cmplwi NARGS8:RC, 16 -- | lwz CARG4, 8(BASE) -+ | lwz CARG3, 8(BASE) -+ |.if FPU - | lfd FARG2, 8(BASE) - | lfd FARG1, 0(BASE) -+ |.else -+ | lwz CARG1, 0(BASE) -+ | lwz CARG2, 4(BASE) -+ | lwz CARG4, 12(BASE) -+ |.endif - | blt ->fff_fallback - | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) - | mr TMP2, BASE -- | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. -+ | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function. - | la BASE, 16(BASE) - | // Remember active hook before pcall. - | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 -+ |.if FPU - | stfd FARG2, 0(TMP2) // Swap function and traceback. -- | subi NARGS8:RC, NARGS8:RC, 16 - | stfd FARG1, 8(TMP2) -+ |.else -+ | stw CARG3, 0(TMP2) -+ | stw CARG4, 4(TMP2) -+ | stw CARG1, 8(TMP2) -+ | stw CARG2, 12(TMP2) -+ |.endif -+ | subi NARGS8:RC, NARGS8:RC, 16 - | addi PC, TMP1, 16+FRAME_PCALL - | b ->vm_call_dispatch - | -@@ -1631,9 +1835,21 @@ static void build_subroutines(BuildCtx *ctx) - | stp BASE, L->top - |2: // Move args to coroutine. - | cmpw TMP1, NARGS8:RC -+ |.if FPU - | lfdx f0, BASE, TMP1 -+ |.else -+ | add CARG3, BASE, TMP1 -+ | lwz TMP2, 0(CARG3) -+ | lwz TMP3, 4(CARG3) -+ |.endif - | beq >3 -+ |.if FPU - | stfdx f0, CARG2, TMP1 -+ |.else -+ | add CARG3, CARG2, TMP1 -+ | stw TMP2, 0(CARG3) -+ | stw TMP3, 4(CARG3) -+ |.endif - | addi TMP1, TMP1, 8 - | b <2 - |3: -@@ -1664,8 +1880,17 @@ static void build_subroutines(BuildCtx *ctx) - | stp TMP2, L:SAVE0->top // Clear coroutine stack. - |5: // Move results from coroutine. - | cmplw TMP1, TMP3 -+ |.if FPU - | lfdx f0, TMP2, TMP1 - | stfdx f0, BASE, TMP1 -+ |.else -+ | add CARG3, TMP2, TMP1 -+ | lwz CARG1, 0(CARG3) -+ | lwz CARG2, 4(CARG3) -+ | add CARG3, BASE, TMP1 -+ | stw CARG1, 0(CARG3) -+ | stw CARG2, 4(CARG3) -+ |.endif - | addi TMP1, TMP1, 8 - | bne <5 - |6: -@@ -1690,12 +1915,22 @@ static void build_subroutines(BuildCtx *ctx) - | andix. TMP0, PC, FRAME_TYPE - | la TMP3, -8(TMP3) - | li TMP1, LJ_TFALSE -+ |.if FPU - | lfd f0, 0(TMP3) -+ |.else -+ | lwz CARG1, 0(TMP3) -+ | lwz CARG2, 4(TMP3) -+ |.endif - | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. - | li RD, (2+1)*8 - | stw TMP1, -8(BASE) // Prepend false to results. - | la RA, -8(BASE) -+ |.if FPU - | stfd f0, 0(BASE) // Copy error message. -+ |.else -+ | stw CARG1, 0(BASE) // Copy error message. -+ | stw CARG2, 4(BASE) -+ |.endif - | b <7 - |.else - | mr CARG1, L -@@ -1874,7 +2109,12 @@ static void build_subroutines(BuildCtx *ctx) - | lus CARG1, 0x8000 // -(2^31). - | beqy ->fff_resi - |5: -+ |.if FPU - | lfd FARG1, 0(BASE) -+ |.else -+ | lwz CARG1, 0(BASE) -+ | lwz CARG2, 4(BASE) -+ |.endif - | blex func - | b ->fff_resn - |.endmacro -@@ -1898,10 +2138,14 @@ static void build_subroutines(BuildCtx *ctx) - | - |.ffunc math_log - | cmplwi NARGS8:RC, 8 -- | lwz CARG3, 0(BASE) -- | lfd FARG1, 0(BASE) -+ | lwz CARG1, 0(BASE) - | bne ->fff_fallback // Need exactly 1 argument. -- | checknum CARG3; bge ->fff_fallback -+ | checknum CARG1; bge ->fff_fallback -+ |.if FPU -+ | lfd FARG1, 0(BASE) -+ |.else -+ | lwz CARG2, 4(BASE) -+ |.endif - | blex log - | b ->fff_resn - | -@@ -1923,17 +2167,24 @@ static void build_subroutines(BuildCtx *ctx) - |.if DUALNUM - |.ffunc math_ldexp - | cmplwi NARGS8:RC, 16 -- | lwz CARG3, 0(BASE) -+ | lwz TMP0, 0(BASE) -+ |.if FPU - | lfd FARG1, 0(BASE) -- | lwz CARG4, 8(BASE) -+ |.else -+ | lwz CARG1, 0(BASE) -+ | lwz CARG2, 4(BASE) -+ |.endif -+ | lwz TMP1, 8(BASE) - |.if GPR64 - | lwz CARG2, 12(BASE) -- |.else -+ |.elif FPU - | lwz CARG1, 12(BASE) -+ |.else -+ | lwz CARG3, 12(BASE) - |.endif - | blt ->fff_fallback -- | checknum CARG3; bge ->fff_fallback -- | checknum CARG4; bne ->fff_fallback -+ | checknum TMP0; bge ->fff_fallback -+ | checknum TMP1; bne ->fff_fallback - |.else - |.ffunc_nn math_ldexp - |.if GPR64 -@@ -1948,8 +2199,10 @@ static void build_subroutines(BuildCtx *ctx) - |.ffunc_n math_frexp - |.if GPR64 - | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) -- |.else -+ |.elif FPU - | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) -+ |.else -+ | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - |.endif - | lwz PC, FRAME_PC(BASE) - | blex frexp -@@ -1958,7 +2211,12 @@ static void build_subroutines(BuildCtx *ctx) - |.if not DUALNUM - | tonum_i FARG2, TMP1 - |.endif -+ |.if FPU - | stfd FARG1, 0(RA) -+ |.else -+ | stw CRET1, 0(RA) -+ | stw CRET2, 4(RA) -+ |.endif - | li RD, (2+1)*8 - |.if DUALNUM - | stw TISNUM, 8(RA) -@@ -1971,13 +2229,20 @@ static void build_subroutines(BuildCtx *ctx) - |.ffunc_n math_modf - |.if GPR64 - | la CARG2, -8(BASE) -- |.else -+ |.elif FPU - | la CARG1, -8(BASE) -+ |.else -+ | la CARG3, -8(BASE) - |.endif - | lwz PC, FRAME_PC(BASE) - | blex modf - | la RA, -8(BASE) -+ |.if FPU - | stfd FARG1, 0(BASE) -+ |.else -+ | stw CRET1, 0(BASE) -+ | stw CRET2, 4(BASE) -+ |.endif - | li RD, (2+1)*8 - | b ->fff_res - | -@@ -1985,13 +2250,13 @@ static void build_subroutines(BuildCtx *ctx) - |.if DUALNUM - | .ffunc_1 name - | checknum CARG3 -- | addi TMP1, BASE, 8 -- | add TMP2, BASE, NARGS8:RC -+ | addi SAVE0, BASE, 8 -+ | add SAVE1, BASE, NARGS8:RC - | bne >4 - |1: // Handle integers. -- | lwz CARG4, 0(TMP1) -- | cmplw cr1, TMP1, TMP2 -- | lwz CARG2, 4(TMP1) -+ | lwz CARG4, 0(SAVE0) -+ | cmplw cr1, SAVE0, SAVE1 -+ | lwz CARG2, 4(SAVE0) - | bge cr1, ->fff_resi - | checknum CARG4 - | xoris TMP0, CARG1, 0x8000 -@@ -2008,36 +2273,76 @@ static void build_subroutines(BuildCtx *ctx) - |.if GPR64 - | rldicl CARG1, CARG1, 0, 32 - |.endif -- | addi TMP1, TMP1, 8 -+ | addi SAVE0, SAVE0, 8 - | b <1 - |3: - | bge ->fff_fallback - | // Convert intermediate result to number and continue below. -+ |.if FPU - | tonum_i FARG1, CARG1 -- | lfd FARG2, 0(TMP1) -+ | lfd FARG2, 0(SAVE0) -+ |.else -+ | mr CARG2, CARG1 -+ | bl ->vm_sfi2d_1 -+ | lwz CARG3, 0(SAVE0) -+ | lwz CARG4, 4(SAVE0) -+ |.endif - | b >6 - |4: -+ |.if FPU - | lfd FARG1, 0(BASE) -+ |.else -+ | lwz CARG1, 0(BASE) -+ | lwz CARG2, 4(BASE) -+ |.endif - | bge ->fff_fallback - |5: // Handle numbers. -- | lwz CARG4, 0(TMP1) -- | cmplw cr1, TMP1, TMP2 -- | lfd FARG2, 0(TMP1) -+ | lwz CARG3, 0(SAVE0) -+ | cmplw cr1, SAVE0, SAVE1 -+ |.if FPU -+ | lfd FARG2, 0(SAVE0) -+ |.else -+ | lwz CARG4, 4(SAVE0) -+ |.endif - | bge cr1, ->fff_resn -- | checknum CARG4; bge >7 -+ | checknum CARG3; bge >7 - |6: -+ | addi SAVE0, SAVE0, 8 -+ |.if FPU - | fsub f0, FARG1, FARG2 -- | addi TMP1, TMP1, 8 - |.if ismax - | fsel FARG1, f0, FARG1, FARG2 - |.else - | fsel FARG1, f0, FARG2, FARG1 - |.endif -+ |.else -+ | stw CARG1, SFSAVE_1 -+ | stw CARG2, SFSAVE_2 -+ | stw CARG3, SFSAVE_3 -+ | stw CARG4, SFSAVE_4 -+ | blex __ledf2 -+ | cmpwi CRET1, 0 -+ |.if ismax -+ | blt >8 -+ |.else -+ | bge >8 -+ |.endif -+ | lwz CARG1, SFSAVE_1 -+ | lwz CARG2, SFSAVE_2 -+ | b <5 -+ |8: -+ | lwz CARG1, SFSAVE_3 -+ | lwz CARG2, SFSAVE_4 -+ |.endif - | b <5 - |7: // Convert integer to number and continue above. -- | lwz CARG2, 4(TMP1) -+ | lwz CARG3, 4(SAVE0) - | bne ->fff_fallback -- | tonum_i FARG2, CARG2 -+ |.if FPU -+ | tonum_i FARG2, CARG3 -+ |.else -+ | bl ->vm_sfi2d_2 -+ |.endif - | b <6 - |.else - | .ffunc_n name -@@ -2237,28 +2542,37 @@ static void build_subroutines(BuildCtx *ctx) - | - |.macro .ffunc_bit_op, name, ins - | .ffunc_bit name -- | addi TMP1, BASE, 8 -- | add TMP2, BASE, NARGS8:RC -+ | addi SAVE0, BASE, 8 -+ | add SAVE1, BASE, NARGS8:RC - |1: -- | lwz CARG4, 0(TMP1) -- | cmplw cr1, TMP1, TMP2 -+ | lwz CARG4, 0(SAVE0) -+ | cmplw cr1, SAVE0, SAVE1 - |.if DUALNUM -- | lwz CARG2, 4(TMP1) -+ | lwz CARG2, 4(SAVE0) - |.else -- | lfd FARG1, 0(TMP1) -+ | lfd FARG1, 0(SAVE0) - |.endif - | bgey cr1, ->fff_resi - | checknum CARG4 - |.if DUALNUM -+ |.if FPU - | bnel ->fff_bitop_fb - |.else -+ | beq >3 -+ | stw CARG1, SFSAVE_1 -+ | bl ->fff_bitop_fb -+ | mr CARG2, CARG1 -+ | lwz CARG1, SFSAVE_1 -+ |3: -+ |.endif -+ |.else - | fadd FARG1, FARG1, TOBIT - | bge ->fff_fallback - | stfd FARG1, TMPD - | lwz CARG2, TMPD_LO - |.endif - | ins CARG1, CARG1, CARG2 -- | addi TMP1, TMP1, 8 -+ | addi SAVE0, SAVE0, 8 - | b <1 - |.endmacro - | -@@ -2280,7 +2594,14 @@ static void build_subroutines(BuildCtx *ctx) - |.macro .ffunc_bit_sh, name, ins, shmod - |.if DUALNUM - | .ffunc_2 bit_..name -+ |.if FPU - | checknum CARG3; bnel ->fff_tobit_fb -+ |.else -+ | checknum CARG3; beq >1 -+ | bl ->fff_tobit_fb -+ | lwz CARG2, 12(BASE) // Conversion polluted CARG2. -+ |1: -+ |.endif - | // Note: no inline conversion from number for 2nd argument! - | checknum CARG4; bne ->fff_fallback - |.else -@@ -2317,27 +2638,77 @@ static void build_subroutines(BuildCtx *ctx) - |->fff_resn: - | lwz PC, FRAME_PC(BASE) - | la RA, -8(BASE) -+ |.if FPU - | stfd FARG1, -8(BASE) -+ |.else -+ | stw CARG1, -8(BASE) -+ | stw CARG2, -4(BASE) -+ |.endif - | b ->fff_res1 - | - |// Fallback FP number to bit conversion. - |->fff_tobit_fb: - |.if DUALNUM -+ |.if FPU - | lfd FARG1, 0(BASE) - | bgt ->fff_fallback - | fadd FARG1, FARG1, TOBIT - | stfd FARG1, TMPD - | lwz CARG1, TMPD_LO - | blr -+ |.else -+ | bgt ->fff_fallback -+ | mr CARG2, CARG1 -+ | mr CARG1, CARG3 -+ |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2. -+ |->vm_tobit: -+ | slwi TMP2, CARG1, 1 -+ | addis TMP2, TMP2, 0x0020 -+ | cmpwi TMP2, 0 -+ | bge >2 -+ | li TMP1, 0x3e0 -+ | srawi TMP2, TMP2, 21 -+ | not TMP1, TMP1 -+ | sub. TMP2, TMP1, TMP2 -+ | cmpwi cr7, CARG1, 0 -+ | blt >1 -+ | slwi TMP1, CARG1, 11 -+ | srwi TMP0, CARG2, 21 -+ | oris TMP1, TMP1, 0x8000 -+ | or TMP1, TMP1, TMP0 -+ | srw CARG1, TMP1, TMP2 -+ | bclr 4, 28 // Return if cr7[lt] == 0, no hint. -+ | neg CARG1, CARG1 -+ | blr -+ |1: -+ | addi TMP2, TMP2, 21 -+ | srw TMP1, CARG2, TMP2 -+ | slwi CARG2, CARG1, 12 -+ | subfic TMP2, TMP2, 20 -+ | slw TMP0, CARG2, TMP2 -+ | or CARG1, TMP1, TMP0 -+ | bclr 4, 28 // Return if cr7[lt] == 0, no hint. -+ | neg CARG1, CARG1 -+ | blr -+ |2: -+ | li CARG1, 0 -+ | blr -+ |.endif - |.endif - |->fff_bitop_fb: - |.if DUALNUM -- | lfd FARG1, 0(TMP1) -+ |.if FPU -+ | lfd FARG1, 0(SAVE0) - | bgt ->fff_fallback - | fadd FARG1, FARG1, TOBIT - | stfd FARG1, TMPD - | lwz CARG2, TMPD_LO - | blr -+ |.else -+ | bgt ->fff_fallback -+ | mr CARG1, CARG4 -+ | b ->vm_tobit -+ |.endif - |.endif - | - |//----------------------------------------------------------------------- -@@ -2530,10 +2901,21 @@ static void build_subroutines(BuildCtx *ctx) - | decode_RA8 RC, INS // Call base. - | beq >2 - |1: // Move results down. -+ |.if FPU - | lfd f0, 0(RA) -+ |.else -+ | lwz CARG1, 0(RA) -+ | lwz CARG2, 4(RA) -+ |.endif - | addic. TMP1, TMP1, -8 - | addi RA, RA, 8 -+ |.if FPU - | stfdx f0, BASE, RC -+ |.else -+ | add CARG3, BASE, RC -+ | stw CARG1, 0(CARG3) -+ | stw CARG2, 4(CARG3) -+ |.endif - | addi RC, RC, 8 - | bne <1 - |2: -@@ -2586,10 +2968,12 @@ static void build_subroutines(BuildCtx *ctx) - |//----------------------------------------------------------------------- - | - |.macro savex_, a, b, c, d -+ |.if FPU - | stfd f..a, 16+a*8(sp) - | stfd f..b, 16+b*8(sp) - | stfd f..c, 16+c*8(sp) - | stfd f..d, 16+d*8(sp) -+ |.endif - |.endmacro - | - |->vm_exit_handler: -@@ -2661,16 +3045,16 @@ static void build_subroutines(BuildCtx *ctx) - | lwz KBASE, PC2PROTO(k)(TMP1) - | // Setup type comparison constants. - | li TISNUM, LJ_TISNUM -- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). -- | stw TMP3, TMPD -+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). -+ | .FPU stw TMP3, TMPD - | li ZERO, 0 -- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -- | lfs TOBIT, TMPD -- | stw TMP3, TMPD -- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) -+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -+ | .FPU lfs TOBIT, TMPD -+ | .FPU stw TMP3, TMPD -+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) - | li TISNIL, LJ_TNIL -- | stw TMP0, TONUM_HI -- | lfs TONUM, TMPD -+ | .FPU stw TMP0, TONUM_HI -+ | .FPU lfs TONUM, TMPD - | // Modified copy of ins_next which handles function header dispatch, too. - | lwz INS, 0(PC) - | addi PC, PC, 4 -@@ -2715,7 +3099,35 @@ static void build_subroutines(BuildCtx *ctx) - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | -- |// NYI: Use internal implementations of floor, ceil, trunc. -+ |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp. -+ | -+ |.macro sfi2d, AHI, ALO -+ |.if not FPU -+ | mr. AHI, ALO -+ | bclr 12, 2 // Handle zero first. -+ | srawi TMP0, ALO, 31 -+ | xor TMP1, ALO, TMP0 -+ | sub TMP1, TMP1, TMP0 // Absolute value in TMP1. -+ | cntlzw AHI, TMP1 -+ | andix. TMP0, TMP0, 0x800 // Mask sign bit. -+ | slw TMP1, TMP1, AHI // Align mantissa left with leading 1. -+ | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI. -+ | slwi ALO, TMP1, 21 -+ | or AHI, AHI, TMP0 // Sign | Exponent. -+ | srwi TMP1, TMP1, 11 -+ | slwi AHI, AHI, 20 // Align left. -+ | add AHI, AHI, TMP1 // Add mantissa, increment exponent. -+ | blr -+ |.endif -+ |.endmacro -+ | -+ |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1. -+ |->vm_sfi2d_1: -+ | sfi2d CARG1, CARG2 -+ | -+ |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1. -+ |->vm_sfi2d_2: -+ | sfi2d CARG3, CARG4 - | - |->vm_modi: - | divwo. TMP0, CARG1, CARG2 -@@ -2783,21 +3195,21 @@ static void build_subroutines(BuildCtx *ctx) - | addi DISPATCH, r12, GG_G2DISP - | stw r11, CTSTATE->cb.slot - | stw r3, CTSTATE->cb.gpr[0] -- | stfd f1, CTSTATE->cb.fpr[0] -+ | .FPU stfd f1, CTSTATE->cb.fpr[0] - | stw r4, CTSTATE->cb.gpr[1] -- | stfd f2, CTSTATE->cb.fpr[1] -+ | .FPU stfd f2, CTSTATE->cb.fpr[1] - | stw r5, CTSTATE->cb.gpr[2] -- | stfd f3, CTSTATE->cb.fpr[2] -+ | .FPU stfd f3, CTSTATE->cb.fpr[2] - | stw r6, CTSTATE->cb.gpr[3] -- | stfd f4, CTSTATE->cb.fpr[3] -+ | .FPU stfd f4, CTSTATE->cb.fpr[3] - | stw r7, CTSTATE->cb.gpr[4] -- | stfd f5, CTSTATE->cb.fpr[4] -+ | .FPU stfd f5, CTSTATE->cb.fpr[4] - | stw r8, CTSTATE->cb.gpr[5] -- | stfd f6, CTSTATE->cb.fpr[5] -+ | .FPU stfd f6, CTSTATE->cb.fpr[5] - | stw r9, CTSTATE->cb.gpr[6] -- | stfd f7, CTSTATE->cb.fpr[6] -+ | .FPU stfd f7, CTSTATE->cb.fpr[6] - | stw r10, CTSTATE->cb.gpr[7] -- | stfd f8, CTSTATE->cb.fpr[7] -+ | .FPU stfd f8, CTSTATE->cb.fpr[7] - | addi TMP0, sp, CFRAME_SPACE+8 - | stw TMP0, CTSTATE->cb.stack - | mr CARG1, CTSTATE -@@ -2808,21 +3220,21 @@ static void build_subroutines(BuildCtx *ctx) - | lp BASE, L:CRET1->base - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lp RC, L:CRET1->top -- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). -+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | li ZERO, 0 - | mr L, CRET1 -- | stw TMP3, TMPD -- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) -+ | .FPU stw TMP3, TMPD -+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) - | lwz LFUNC:RB, FRAME_FUNC(BASE) -- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -- | stw TMP0, TONUM_HI -+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -+ | .FPU stw TMP0, TONUM_HI - | li TISNIL, LJ_TNIL - | li_vmstate INTERP -- | lfs TOBIT, TMPD -- | stw TMP3, TMPD -+ | .FPU lfs TOBIT, TMPD -+ | .FPU stw TMP3, TMPD - | sub RC, RC, BASE - | st_vmstate -- | lfs TONUM, TMPD -+ | .FPU lfs TONUM, TMPD - | ins_callt - |.endif - | -@@ -2836,7 +3248,7 @@ static void build_subroutines(BuildCtx *ctx) - | mr CARG2, RA - | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) - | lwz CRET1, CTSTATE->cb.gpr[0] -- | lfd FARG1, CTSTATE->cb.fpr[0] -+ | .FPU lfd FARG1, CTSTATE->cb.fpr[0] - | lwz CRET2, CTSTATE->cb.gpr[1] - | b ->vm_leave_unw - |.endif -@@ -2870,14 +3282,14 @@ static void build_subroutines(BuildCtx *ctx) - | bge <1 - |2: - | bney cr1, >3 -- | lfd f1, CCSTATE->fpr[0] -- | lfd f2, CCSTATE->fpr[1] -- | lfd f3, CCSTATE->fpr[2] -- | lfd f4, CCSTATE->fpr[3] -- | lfd f5, CCSTATE->fpr[4] -- | lfd f6, CCSTATE->fpr[5] -- | lfd f7, CCSTATE->fpr[6] -- | lfd f8, CCSTATE->fpr[7] -+ | .FPU lfd f1, CCSTATE->fpr[0] -+ | .FPU lfd f2, CCSTATE->fpr[1] -+ | .FPU lfd f3, CCSTATE->fpr[2] -+ | .FPU lfd f4, CCSTATE->fpr[3] -+ | .FPU lfd f5, CCSTATE->fpr[4] -+ | .FPU lfd f6, CCSTATE->fpr[5] -+ | .FPU lfd f7, CCSTATE->fpr[6] -+ | .FPU lfd f8, CCSTATE->fpr[7] - |3: - | lp TMP0, CCSTATE->func - | lwz CARG2, CCSTATE->gpr[1] -@@ -2894,7 +3306,7 @@ static void build_subroutines(BuildCtx *ctx) - | lwz TMP2, -4(r14) - | lwz TMP0, 4(r14) - | stw CARG1, CCSTATE:TMP1->gpr[0] -- | stfd FARG1, CCSTATE:TMP1->fpr[0] -+ | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0] - | stw CARG2, CCSTATE:TMP1->gpr[1] - | mtlr TMP0 - | stw CARG3, CCSTATE:TMP1->gpr[2] -@@ -2923,19 +3335,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1*8, RD = src2*8, JMP with RD = target - |.if DUALNUM -- | lwzux TMP0, RA, BASE -+ | lwzux CARG1, RA, BASE - | addi PC, PC, 4 - | lwz CARG2, 4(RA) -- | lwzux TMP1, RD, BASE -+ | lwzux CARG3, RD, BASE - | lwz TMP2, -4(PC) -- | checknum cr0, TMP0 -- | lwz CARG3, 4(RD) -+ | checknum cr0, CARG1 -+ | lwz CARG4, 4(RD) - | decode_RD4 TMP2, TMP2 -- | checknum cr1, TMP1 -- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) -+ | checknum cr1, CARG3 -+ | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16) - | bne cr0, >7 - | bne cr1, >8 -- | cmpw CARG2, CARG3 -+ | cmpw CARG2, CARG4 - if (op == BC_ISLT) { - | bge >2 - } else if (op == BC_ISGE) { -@@ -2946,28 +3358,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | ble >2 - } - |1: -- | add PC, PC, TMP2 -+ | add PC, PC, SAVE0 - |2: - | ins_next - | - |7: // RA is not an integer. - | bgt cr0, ->vmeta_comp - | // RA is a number. -- | lfd f0, 0(RA) -+ | .FPU lfd f0, 0(RA) - | bgt cr1, ->vmeta_comp - | blt cr1, >4 - | // RA is a number, RD is an integer. -- | tonum_i f1, CARG3 -+ |.if FPU -+ | tonum_i f1, CARG4 -+ |.else -+ | bl ->vm_sfi2d_2 -+ |.endif - | b >5 - | - |8: // RA is an integer, RD is not an integer. - | bgt cr1, ->vmeta_comp - | // RA is an integer, RD is a number. -+ |.if FPU - | tonum_i f0, CARG2 -+ |.else -+ | bl ->vm_sfi2d_1 -+ |.endif - |4: -- | lfd f1, 0(RD) -+ | .FPU lfd f1, 0(RD) - |5: -+ |.if FPU - | fcmpu cr0, f0, f1 -+ |.else -+ | blex __ledf2 -+ | cmpwi CRET1, 0 -+ |.endif - if (op == BC_ISLT) { - | bge <2 - } else if (op == BC_ISGE) { -@@ -3015,42 +3440,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - vk = op == BC_ISEQV; - | // RA = src1*8, RD = src2*8, JMP with RD = target - |.if DUALNUM -- | lwzux TMP0, RA, BASE -+ | lwzux CARG1, RA, BASE - | addi PC, PC, 4 - | lwz CARG2, 4(RA) -- | lwzux TMP1, RD, BASE -- | checknum cr0, TMP0 -- | lwz TMP2, -4(PC) -- | checknum cr1, TMP1 -- | decode_RD4 TMP2, TMP2 -- | lwz CARG3, 4(RD) -+ | lwzux CARG3, RD, BASE -+ | checknum cr0, CARG1 -+ | lwz SAVE0, -4(PC) -+ | checknum cr1, CARG3 -+ | decode_RD4 SAVE0, SAVE0 -+ | lwz CARG4, 4(RD) - | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt -- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) -+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) - if (vk) { - | ble cr7, ->BC_ISEQN_Z - } else { - | ble cr7, ->BC_ISNEN_Z - } - |.else -- | lwzux TMP0, RA, BASE -- | lwz TMP2, 0(PC) -+ | lwzux CARG1, RA, BASE -+ | lwz SAVE0, 0(PC) - | lfd f0, 0(RA) - | addi PC, PC, 4 -- | lwzux TMP1, RD, BASE -- | checknum cr0, TMP0 -- | decode_RD4 TMP2, TMP2 -+ | lwzux CARG3, RD, BASE -+ | checknum cr0, CARG1 -+ | decode_RD4 SAVE0, SAVE0 - | lfd f1, 0(RD) -- | checknum cr1, TMP1 -- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) -+ | checknum cr1, CARG3 -+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) - | bge cr0, >5 - | bge cr1, >5 - | fcmpu cr0, f0, f1 - if (vk) { - | bne >1 -- | add PC, PC, TMP2 -+ | add PC, PC, SAVE0 - } else { - | beq >1 -- | add PC, PC, TMP2 -+ | add PC, PC, SAVE0 - } - |1: - | ins_next -@@ -3058,36 +3483,36 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - |5: // Either or both types are not numbers. - |.if not DUALNUM - | lwz CARG2, 4(RA) -- | lwz CARG3, 4(RD) -+ | lwz CARG4, 4(RD) - |.endif - |.if FFI -- | cmpwi cr7, TMP0, LJ_TCDATA -- | cmpwi cr5, TMP1, LJ_TCDATA -+ | cmpwi cr7, CARG1, LJ_TCDATA -+ | cmpwi cr5, CARG3, LJ_TCDATA - |.endif -- | not TMP3, TMP0 -- | cmplw TMP0, TMP1 -- | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? -+ | not TMP2, CARG1 -+ | cmplw CARG1, CARG3 -+ | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive? - |.if FFI - | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq - |.endif -- | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? -+ | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata? - |.if FFI - | beq cr7, ->vmeta_equal_cd - |.endif -- | cmplw cr5, CARG2, CARG3 -+ | cmplw cr5, CARG2, CARG4 - | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. - | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. - | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. -- | mr SAVE0, PC -+ | mr SAVE1, PC - | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. - | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. - if (vk) { - | bne cr0, >6 -- | add PC, PC, TMP2 -+ | add PC, PC, SAVE0 - |6: - } else { - | beq cr0, >6 -- | add PC, PC, TMP2 -+ | add PC, PC, SAVE0 - |6: - } - |.if DUALNUM -@@ -3102,6 +3527,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! -+ | mr CARG3, CARG4 - | lwz TAB:TMP2, TAB:CARG2->metatable - | li CARG4, 1-vk // ne = 0 or 1. - | cmplwi TAB:TMP2, 0 -@@ -3109,7 +3535,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | lbz TMP2, TAB:TMP2->nomm - | andix. TMP2, TMP2, 1<vmeta_equal // Handle __eq metamethod. - break; - -@@ -3150,16 +3576,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - vk = op == BC_ISEQN; - | // RA = src*8, RD = num_const*8, JMP with RD = target - |.if DUALNUM -- | lwzux TMP0, RA, BASE -+ | lwzux CARG1, RA, BASE - | addi PC, PC, 4 - | lwz CARG2, 4(RA) -- | lwzux TMP1, RD, KBASE -- | checknum cr0, TMP0 -- | lwz TMP2, -4(PC) -- | checknum cr1, TMP1 -- | decode_RD4 TMP2, TMP2 -- | lwz CARG3, 4(RD) -- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) -+ | lwzux CARG3, RD, KBASE -+ | checknum cr0, CARG1 -+ | lwz SAVE0, -4(PC) -+ | checknum cr1, CARG3 -+ | decode_RD4 SAVE0, SAVE0 -+ | lwz CARG4, 4(RD) -+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) - if (vk) { - |->BC_ISEQN_Z: - } else { -@@ -3167,7 +3593,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - } - | bne cr0, >7 - | bne cr1, >8 -- | cmpw CARG2, CARG3 -+ | cmpw CARG2, CARG4 - |4: - |.else - if (vk) { -@@ -3175,20 +3601,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - } else { - |->BC_ISNEN_Z: // Dummy label. - } -- | lwzx TMP0, BASE, RA -+ | lwzx CARG1, BASE, RA - | addi PC, PC, 4 - | lfdx f0, BASE, RA -- | lwz TMP2, -4(PC) -+ | lwz SAVE0, -4(PC) - | lfdx f1, KBASE, RD -- | decode_RD4 TMP2, TMP2 -- | checknum TMP0 -- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) -+ | decode_RD4 SAVE0, SAVE0 -+ | checknum CARG1 -+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) - | bge >3 - | fcmpu cr0, f0, f1 - |.endif - if (vk) { - | bne >1 -- | add PC, PC, TMP2 -+ | add PC, PC, SAVE0 - |1: - |.if not FFI - |3: -@@ -3199,13 +3625,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - |.if not FFI - |3: - |.endif -- | add PC, PC, TMP2 -+ | add PC, PC, SAVE0 - |2: - } - | ins_next - |.if FFI - |3: -- | cmpwi TMP0, LJ_TCDATA -+ | cmpwi CARG1, LJ_TCDATA - | beq ->vmeta_equal_cd - | b <1 - |.endif -@@ -3213,18 +3639,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - |7: // RA is not an integer. - | bge cr0, <3 - | // RA is a number. -- | lfd f0, 0(RA) -+ | .FPU lfd f0, 0(RA) - | blt cr1, >1 - | // RA is a number, RD is an integer. -- | tonum_i f1, CARG3 -+ |.if FPU -+ | tonum_i f1, CARG4 -+ |.else -+ | bl ->vm_sfi2d_2 -+ |.endif - | b >2 - | - |8: // RA is an integer, RD is a number. -+ |.if FPU - | tonum_i f0, CARG2 -+ |.else -+ | bl ->vm_sfi2d_1 -+ |.endif - |1: -- | lfd f1, 0(RD) -+ | .FPU lfd f1, 0(RD) - |2: -+ |.if FPU - | fcmpu cr0, f0, f1 -+ |.else -+ | blex __ledf2 -+ | cmpwi CRET1, 0 -+ |.endif - | b <4 - |.endif - break; -@@ -3279,7 +3718,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | add PC, PC, TMP2 - } else { - | li TMP1, LJ_TFALSE -+ |.if FPU - | lfdx f0, BASE, RD -+ |.else -+ | lwzux CARG1, RD, BASE -+ | lwz CARG2, 4(RD) -+ |.endif - | cmplw TMP0, TMP1 - if (op == BC_ISTC) { - | bge >1 -@@ -3288,7 +3732,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - } - | addis PC, PC, -(BCBIAS_J*4 >> 16) - | decode_RD4 TMP2, INS -+ |.if FPU - | stfdx f0, BASE, RA -+ |.else -+ | stwux CARG1, RA, BASE -+ | stw CARG2, 4(RA) -+ |.endif - | add PC, PC, TMP2 - |1: - } -@@ -3323,8 +3772,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - case BC_MOV: - | // RA = dst*8, RD = src*8 - | ins_next1 -+ |.if FPU - | lfdx f0, BASE, RD - | stfdx f0, BASE, RA -+ |.else -+ | lwzux TMP0, RD, BASE -+ | lwz TMP1, 4(RD) -+ | stwux TMP0, RA, BASE -+ | stw TMP1, 4(RA) -+ |.endif - | ins_next2 - break; - case BC_NOT: -@@ -3426,44 +3882,65 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||switch (vk) { - ||case 0: -- | lwzx TMP1, BASE, RB -+ | lwzx CARG1, BASE, RB - | .if DUALNUM -- | lwzx TMP2, KBASE, RC -+ | lwzx CARG3, KBASE, RC - | .endif -+ | .if FPU - | lfdx f14, BASE, RB - | lfdx f15, KBASE, RC -+ | .else -+ | add TMP1, BASE, RB -+ | add TMP2, KBASE, RC -+ | lwz CARG2, 4(TMP1) -+ | lwz CARG4, 4(TMP2) -+ | .endif - | .if DUALNUM -- | checknum cr0, TMP1 -- | checknum cr1, TMP2 -+ | checknum cr0, CARG1 -+ | checknum cr1, CARG3 - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | bge ->vmeta_arith_vn - | .else -- | checknum TMP1; bge ->vmeta_arith_vn -+ | checknum CARG1; bge ->vmeta_arith_vn - | .endif - || break; - ||case 1: -- | lwzx TMP1, BASE, RB -+ | lwzx CARG1, BASE, RB - | .if DUALNUM -- | lwzx TMP2, KBASE, RC -+ | lwzx CARG3, KBASE, RC - | .endif -+ | .if FPU - | lfdx f15, BASE, RB - | lfdx f14, KBASE, RC -+ | .else -+ | add TMP1, BASE, RB -+ | add TMP2, KBASE, RC -+ | lwz CARG2, 4(TMP1) -+ | lwz CARG4, 4(TMP2) -+ | .endif - | .if DUALNUM -- | checknum cr0, TMP1 -- | checknum cr1, TMP2 -+ | checknum cr0, CARG1 -+ | checknum cr1, CARG3 - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | bge ->vmeta_arith_nv - | .else -- | checknum TMP1; bge ->vmeta_arith_nv -+ | checknum CARG1; bge ->vmeta_arith_nv - | .endif - || break; - ||default: -- | lwzx TMP1, BASE, RB -- | lwzx TMP2, BASE, RC -+ | lwzx CARG1, BASE, RB -+ | lwzx CARG3, BASE, RC -+ | .if FPU - | lfdx f14, BASE, RB - | lfdx f15, BASE, RC -- | checknum cr0, TMP1 -- | checknum cr1, TMP2 -+ | .else -+ | add TMP1, BASE, RB -+ | add TMP2, BASE, RC -+ | lwz CARG2, 4(TMP1) -+ | lwz CARG4, 4(TMP2) -+ | .endif -+ | checknum cr0, CARG1 -+ | checknum cr1, CARG3 - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | bge ->vmeta_arith_vv - || break; -@@ -3497,48 +3974,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | fsub a, b, a // b - floor(b/c)*c - |.endmacro - | -+ |.macro sfpmod -+ |->BC_MODVN_Z: -+ | stw CARG1, SFSAVE_1 -+ | stw CARG2, SFSAVE_2 -+ | mr SAVE0, CARG3 -+ | mr SAVE1, CARG4 -+ | blex __divdf3 -+ | blex floor -+ | mr CARG3, SAVE0 -+ | mr CARG4, SAVE1 -+ | blex __muldf3 -+ | mr CARG3, CRET1 -+ | mr CARG4, CRET2 -+ | lwz CARG1, SFSAVE_1 -+ | lwz CARG2, SFSAVE_2 -+ | blex __subdf3 -+ |.endmacro -+ | - |.macro ins_arithfp, fpins - | ins_arithpre - |.if "fpins" == "fpmod_" - | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. -- |.else -+ |.elif FPU - | fpins f0, f14, f15 - | ins_next1 - | stfdx f0, BASE, RA - | ins_next2 -+ |.else -+ | blex __divdf3 // Only soft-float div uses this macro. -+ | ins_next1 -+ | stwux CRET1, RA, BASE -+ | stw CRET2, 4(RA) -+ | ins_next2 - |.endif - |.endmacro - | -- |.macro ins_arithdn, intins, fpins -+ |.macro ins_arithdn, intins, fpins, fpcall - | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||switch (vk) { - ||case 0: -- | lwzux TMP1, RB, BASE -- | lwzux TMP2, RC, KBASE -- | lwz CARG1, 4(RB) -- | checknum cr0, TMP1 -- | lwz CARG2, 4(RC) -+ | lwzux CARG1, RB, BASE -+ | lwzux CARG3, RC, KBASE -+ | lwz CARG2, 4(RB) -+ | checknum cr0, CARG1 -+ | lwz CARG4, 4(RC) -+ | checknum cr1, CARG3 - || break; - ||case 1: -- | lwzux TMP1, RB, BASE -- | lwzux TMP2, RC, KBASE -- | lwz CARG2, 4(RB) -- | checknum cr0, TMP1 -- | lwz CARG1, 4(RC) -+ | lwzux CARG3, RB, BASE -+ | lwzux CARG1, RC, KBASE -+ | lwz CARG4, 4(RB) -+ | checknum cr0, CARG3 -+ | lwz CARG2, 4(RC) -+ | checknum cr1, CARG1 - || break; - ||default: -- | lwzux TMP1, RB, BASE -- | lwzux TMP2, RC, BASE -- | lwz CARG1, 4(RB) -- | checknum cr0, TMP1 -- | lwz CARG2, 4(RC) -+ | lwzux CARG1, RB, BASE -+ | lwzux CARG3, RC, BASE -+ | lwz CARG2, 4(RB) -+ | checknum cr0, CARG1 -+ | lwz CARG4, 4(RC) -+ | checknum cr1, CARG3 - || break; - ||} -- | checknum cr1, TMP2 - | bne >5 - | bne cr1, >5 -- | intins CARG1, CARG1, CARG2 -+ |.if "intins" == "intmod" -+ | mr CARG1, CARG2 -+ | mr CARG2, CARG4 -+ |.endif -+ | intins CARG1, CARG2, CARG4 - | bso >4 - |1: - | ins_next1 -@@ -3550,29 +4057,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | checkov TMP0, <1 // Ignore unrelated overflow. - | ins_arithfallback b - |5: // FP variant. -+ |.if FPU - ||if (vk == 1) { - | lfd f15, 0(RB) -- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | lfd f14, 0(RC) - ||} else { - | lfd f14, 0(RB) -- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | lfd f15, 0(RC) - ||} -+ |.endif -+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | ins_arithfallback bge - |.if "fpins" == "fpmod_" - | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. - |.else -+ |.if FPU - | fpins f0, f14, f15 -- | ins_next1 - | stfdx f0, BASE, RA -+ |.else -+ |.if "fpcall" == "sfpmod" -+ | sfpmod -+ |.else -+ | blex fpcall -+ |.endif -+ | stwux CRET1, RA, BASE -+ | stw CRET2, 4(RA) -+ |.endif -+ | ins_next1 - | b <2 - |.endif - |.endmacro - | -- |.macro ins_arith, intins, fpins -+ |.macro ins_arith, intins, fpins, fpcall - |.if DUALNUM -- | ins_arithdn intins, fpins -+ | ins_arithdn intins, fpins, fpcall - |.else - | ins_arithfp fpins - |.endif -@@ -3587,9 +4105,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | addo. TMP0, TMP0, TMP3 - | add y, a, b - |.endmacro -- | ins_arith addo32., fadd -+ | ins_arith addo32., fadd, __adddf3 - |.else -- | ins_arith addo., fadd -+ | ins_arith addo., fadd, __adddf3 - |.endif - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: -@@ -3601,36 +4119,48 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | subo. TMP0, TMP0, TMP3 - | sub y, a, b - |.endmacro -- | ins_arith subo32., fsub -+ | ins_arith subo32., fsub, __subdf3 - |.else -- | ins_arith subo., fsub -+ | ins_arith subo., fsub, __subdf3 - |.endif - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: -- | ins_arith mullwo., fmul -+ | ins_arith mullwo., fmul, __muldf3 - break; - case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - | ins_arithfp fdiv - break; - case BC_MODVN: -- | ins_arith intmod, fpmod -+ | ins_arith intmod, fpmod, sfpmod - break; - case BC_MODNV: case BC_MODVV: -- | ins_arith intmod, fpmod_ -+ | ins_arith intmod, fpmod_, sfpmod - break; - case BC_POW: - | // NYI: (partial) integer arithmetic. -- | lwzx TMP1, BASE, RB -+ | lwzx CARG1, BASE, RB -+ | lwzx CARG3, BASE, RC -+ |.if FPU - | lfdx FARG1, BASE, RB -- | lwzx TMP2, BASE, RC - | lfdx FARG2, BASE, RC -- | checknum cr0, TMP1 -- | checknum cr1, TMP2 -+ |.else -+ | add TMP1, BASE, RB -+ | add TMP2, BASE, RC -+ | lwz CARG2, 4(TMP1) -+ | lwz CARG4, 4(TMP2) -+ |.endif -+ | checknum cr0, CARG1 -+ | checknum cr1, CARG3 - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | bge ->vmeta_arith_vv - | blex pow - | ins_next1 -+ |.if FPU - | stfdx FARG1, BASE, RA -+ |.else -+ | stwux CARG1, RA, BASE -+ | stw CARG2, 4(RA) -+ |.endif - | ins_next2 - break; - -@@ -3650,8 +4180,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | lp BASE, L->base - | bne ->vmeta_binop - | ins_next1 -+ |.if FPU - | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. - | stfdx f0, BASE, RA -+ |.else -+ | lwzux TMP0, SAVE0, BASE -+ | lwz TMP1, 4(SAVE0) -+ | stwux TMP0, RA, BASE -+ | stw TMP1, 4(RA) -+ |.endif - | ins_next2 - break; - -@@ -3714,8 +4251,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - case BC_KNUM: - | // RA = dst*8, RD = num_const*8 - | ins_next1 -+ |.if FPU - | lfdx f0, KBASE, RD - | stfdx f0, BASE, RA -+ |.else -+ | lwzux TMP0, RD, KBASE -+ | lwz TMP1, 4(RD) -+ | stwux TMP0, RA, BASE -+ | stw TMP1, 4(RA) -+ |.endif - | ins_next2 - break; - case BC_KPRI: -@@ -3748,8 +4292,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | lwzx UPVAL:RB, LFUNC:RB, RD - | ins_next1 - | lwz TMP1, UPVAL:RB->v -+ |.if FPU - | lfd f0, 0(TMP1) - | stfdx f0, BASE, RA -+ |.else -+ | lwz TMP2, 0(TMP1) -+ | lwz TMP3, 4(TMP1) -+ | stwux TMP2, RA, BASE -+ | stw TMP3, 4(RA) -+ |.endif - | ins_next2 - break; - case BC_USETV: -@@ -3757,14 +4308,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi RA, RA, 1 - | addi RA, RA, offsetof(GCfuncL, uvptr) -+ |.if FPU - | lfdux f0, RD, BASE -+ |.else -+ | lwzux CARG1, RD, BASE -+ | lwz CARG3, 4(RD) -+ |.endif - | lwzx UPVAL:RB, LFUNC:RB, RA - | lbz TMP3, UPVAL:RB->marked - | lwz CARG2, UPVAL:RB->v - | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) - | lbz TMP0, UPVAL:RB->closed - | lwz TMP2, 0(RD) -+ |.if FPU - | stfd f0, 0(CARG2) -+ |.else -+ | stw CARG1, 0(CARG2) -+ | stw CARG3, 4(CARG2) -+ |.endif - | cmplwi cr1, TMP0, 0 - | lwz TMP1, 4(RD) - | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq -@@ -3820,11 +4381,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi RA, RA, 1 - | addi RA, RA, offsetof(GCfuncL, uvptr) -+ |.if FPU - | lfdx f0, KBASE, RD -+ |.else -+ | lwzux TMP2, RD, KBASE -+ | lwz TMP3, 4(RD) -+ |.endif - | lwzx UPVAL:RB, LFUNC:RB, RA - | ins_next1 - | lwz TMP1, UPVAL:RB->v -+ |.if FPU - | stfd f0, 0(TMP1) -+ |.else -+ | stw TMP2, 0(TMP1) -+ | stw TMP3, 4(TMP1) -+ |.endif - | ins_next2 - break; - case BC_USETP: -@@ -3972,11 +4543,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - |.endif - | ble ->vmeta_tgetv // Integer key and in array part? - | lwzx TMP0, TMP1, TMP2 -+ |.if FPU - | lfdx f14, TMP1, TMP2 -+ |.else -+ | lwzux SAVE0, TMP1, TMP2 -+ | lwz SAVE1, 4(TMP1) -+ |.endif - | checknil TMP0; beq >2 - |1: - | ins_next1 -+ |.if FPU - | stfdx f14, BASE, RA -+ |.else -+ | stwux SAVE0, RA, BASE -+ | stw SAVE1, 4(RA) -+ |.endif - | ins_next2 - | - |2: // Check for __index if table value is nil. -@@ -4052,12 +4633,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | lwz TMP1, TAB:RB->asize - | lwz TMP2, TAB:RB->array - | cmplw TMP0, TMP1; bge ->vmeta_tgetb -+ |.if FPU - | lwzx TMP1, TMP2, RC - | lfdx f0, TMP2, RC -+ |.else -+ | lwzux TMP1, TMP2, RC -+ | lwz TMP3, 4(TMP2) -+ |.endif - | checknil TMP1; beq >5 - |1: - | ins_next1 -+ |.if FPU - | stfdx f0, BASE, RA -+ |.else -+ | stwux TMP1, RA, BASE -+ | stw TMP3, 4(RA) -+ |.endif - | ins_next2 - | - |5: // Check for __index if table value is nil. -@@ -4087,10 +4678,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | cmplw TMP0, CARG2 - | slwi TMP2, CARG2, 3 - | ble ->vmeta_tgetr // In array part? -+ |.if FPU - | lfdx f14, TMP1, TMP2 -+ |.else -+ | lwzux SAVE0, TMP2, TMP1 -+ | lwz SAVE1, 4(TMP2) -+ |.endif - |->BC_TGETR_Z: - | ins_next1 -+ |.if FPU - | stfdx f14, BASE, RA -+ |.else -+ | stwux SAVE0, RA, BASE -+ | stw SAVE1, 4(RA) -+ |.endif - | ins_next2 - break; - -@@ -4131,11 +4732,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | ble ->vmeta_tsetv // Integer key and in array part? - | lwzx TMP2, TMP1, TMP0 - | lbz TMP3, TAB:RB->marked -+ |.if FPU - | lfdx f14, BASE, RA -+ |.else -+ | add SAVE1, BASE, RA -+ | lwz SAVE0, 0(SAVE1) -+ | lwz SAVE1, 4(SAVE1) -+ |.endif - | checknil TMP2; beq >3 - |1: - | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) -+ |.if FPU - | stfdx f14, TMP1, TMP0 -+ |.else -+ | stwux SAVE0, TMP1, TMP0 -+ | stw SAVE1, 4(TMP1) -+ |.endif - | bne >7 - |2: - | ins_next -@@ -4176,7 +4788,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | lwz NODE:TMP2, TAB:RB->node - | stb ZERO, TAB:RB->nomm // Clear metamethod cache. - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask -+ |.if FPU - | lfdx f14, BASE, RA -+ |.else -+ | add CARG2, BASE, RA -+ | lwz SAVE0, 0(CARG2) -+ | lwz SAVE1, 4(CARG2) -+ |.endif - | slwi TMP0, TMP1, 5 - | slwi TMP1, TMP1, 3 - | sub TMP1, TMP0, TMP1 -@@ -4192,7 +4810,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | checknil CARG2; beq >4 // Key found, but nil value? - |2: - | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) -+ |.if FPU - | stfd f14, NODE:TMP2->val -+ |.else -+ | stw SAVE0, NODE:TMP2->val.u32.hi -+ | stw SAVE1, NODE:TMP2->val.u32.lo -+ |.endif - | bne >7 - |3: - | ins_next -@@ -4231,7 +4854,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) - | // Returns TValue *. - | lp BASE, L->base -+ |.if FPU - | stfd f14, 0(CRET1) -+ |.else -+ | stw SAVE0, 0(CRET1) -+ | stw SAVE1, 4(CRET1) -+ |.endif - | b <3 // No 2nd write barrier needed. - | - |7: // Possible table write barrier for the value. Skip valiswhite check. -@@ -4248,13 +4876,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | lwz TMP2, TAB:RB->array - | lbz TMP3, TAB:RB->marked - | cmplw TMP0, TMP1 -+ |.if FPU - | lfdx f14, BASE, RA -+ |.else -+ | add CARG2, BASE, RA -+ | lwz SAVE0, 0(CARG2) -+ | lwz SAVE1, 4(CARG2) -+ |.endif - | bge ->vmeta_tsetb - | lwzx TMP1, TMP2, RC - | checknil TMP1; beq >5 - |1: - | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) -+ |.if FPU - | stfdx f14, TMP2, RC -+ |.else -+ | stwux SAVE0, RC, TMP2 -+ | stw SAVE1, 4(RC) -+ |.endif - | bne >7 - |2: - | ins_next -@@ -4294,10 +4933,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - |2: - | cmplw TMP0, CARG3 - | slwi TMP2, CARG3, 3 -+ |.if FPU - | lfdx f14, BASE, RA -+ |.else -+ | lwzux SAVE0, RA, BASE -+ | lwz SAVE1, 4(RA) -+ |.endif - | ble ->vmeta_tsetr // In array part? - | ins_next1 -+ |.if FPU - | stfdx f14, TMP1, TMP2 -+ |.else -+ | stwux SAVE0, TMP1, TMP2 -+ | stw SAVE1, 4(TMP1) -+ |.endif - | ins_next2 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. -@@ -4327,10 +4976,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | add TMP1, TMP1, TMP0 - | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) - |3: // Copy result slots to table. -+ |.if FPU - | lfd f0, 0(RA) -+ |.else -+ | lwz SAVE0, 0(RA) -+ | lwz SAVE1, 4(RA) -+ |.endif - | addi RA, RA, 8 - | cmpw cr1, RA, TMP2 -+ |.if FPU - | stfd f0, 0(TMP1) -+ |.else -+ | stw SAVE0, 0(TMP1) -+ | stw SAVE1, 4(TMP1) -+ |.endif - | addi TMP1, TMP1, 8 - | blt cr1, <3 - | bne >7 -@@ -4397,9 +5056,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | beq cr1, >3 - |2: - | addi TMP3, TMP2, 8 -+ |.if FPU - | lfdx f0, RA, TMP2 -+ |.else -+ | add CARG3, RA, TMP2 -+ | lwz CARG1, 0(CARG3) -+ | lwz CARG2, 4(CARG3) -+ |.endif - | cmplw cr1, TMP3, NARGS8:RC -+ |.if FPU - | stfdx f0, BASE, TMP2 -+ |.else -+ | stwux CARG1, TMP2, BASE -+ | stw CARG2, 4(TMP2) -+ |.endif - | mr TMP2, TMP3 - | bne cr1, <2 - |3: -@@ -4432,14 +5102,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | add BASE, BASE, RA - | lwz TMP1, -24(BASE) - | lwz LFUNC:RB, -20(BASE) -+ |.if FPU - | lfd f1, -8(BASE) - | lfd f0, -16(BASE) -+ |.else -+ | lwz CARG1, -8(BASE) -+ | lwz CARG2, -4(BASE) -+ | lwz CARG3, -16(BASE) -+ | lwz CARG4, -12(BASE) -+ |.endif - | stw TMP1, 0(BASE) // Copy callable. - | stw LFUNC:RB, 4(BASE) - | checkfunc TMP1 -- | stfd f1, 16(BASE) // Copy control var. - | li NARGS8:RC, 16 // Iterators get 2 arguments. -+ |.if FPU -+ | stfd f1, 16(BASE) // Copy control var. - | stfdu f0, 8(BASE) // Copy state. -+ |.else -+ | stw CARG1, 16(BASE) // Copy control var. -+ | stw CARG2, 20(BASE) -+ | stwu CARG3, 8(BASE) // Copy state. -+ | stw CARG4, 4(BASE) -+ |.endif - | bne ->vmeta_call - | ins_call - break; -@@ -4460,7 +5144,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | slwi TMP3, RC, 3 - | bge >5 // Index points after array part? - | lwzx TMP2, TMP1, TMP3 -+ |.if FPU - | lfdx f0, TMP1, TMP3 -+ |.else -+ | lwzux CARG1, TMP3, TMP1 -+ | lwz CARG2, 4(TMP3) -+ |.endif - | checknil TMP2 - | lwz INS, -4(PC) - | beq >4 -@@ -4472,7 +5161,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - |.endif - | addi RC, RC, 1 - | addis TMP3, PC, -(BCBIAS_J*4 >> 16) -+ |.if FPU - | stfd f0, 8(RA) -+ |.else -+ | stw CARG1, 8(RA) -+ | stw CARG2, 12(RA) -+ |.endif - | decode_RD4 TMP1, INS - | stw RC, -4(RA) // Update control var. - | add PC, TMP1, TMP3 -@@ -4497,17 +5191,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | slwi RB, RC, 3 - | sub TMP3, TMP3, RB - | lwzx RB, TMP2, TMP3 -+ |.if FPU - | lfdx f0, TMP2, TMP3 -+ |.else -+ | add CARG3, TMP2, TMP3 -+ | lwz CARG1, 0(CARG3) -+ | lwz CARG2, 4(CARG3) -+ |.endif - | add NODE:TMP3, TMP2, TMP3 - | checknil RB - | lwz INS, -4(PC) - | beq >7 -+ |.if FPU - | lfd f1, NODE:TMP3->key -+ |.else -+ | lwz CARG3, NODE:TMP3->key.u32.hi -+ | lwz CARG4, NODE:TMP3->key.u32.lo -+ |.endif - | addis TMP2, PC, -(BCBIAS_J*4 >> 16) -+ |.if FPU - | stfd f0, 8(RA) -+ |.else -+ | stw CARG1, 8(RA) -+ | stw CARG2, 12(RA) -+ |.endif - | add RC, RC, TMP0 - | decode_RD4 TMP1, INS -+ |.if FPU - | stfd f1, 0(RA) -+ |.else -+ | stw CARG3, 0(RA) -+ | stw CARG4, 4(RA) -+ |.endif - | addi RC, RC, 1 - | add PC, TMP1, TMP2 - | stw RC, -4(RA) // Update control var. -@@ -4573,9 +5288,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | subi TMP2, TMP2, 16 - | ble >2 // No vararg slots? - |1: // Copy vararg slots to destination slots. -+ |.if FPU - | lfd f0, 0(RC) -+ |.else -+ | lwz CARG1, 0(RC) -+ | lwz CARG2, 4(RC) -+ |.endif - | addi RC, RC, 8 -+ |.if FPU - | stfd f0, 0(RA) -+ |.else -+ | stw CARG1, 0(RA) -+ | stw CARG2, 4(RA) -+ |.endif - | cmplw RA, TMP2 - | cmplw cr1, RC, TMP3 - | bge >3 // All destination slots filled? -@@ -4598,9 +5323,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | addi MULTRES, TMP1, 8 - | bgt >7 - |6: -+ |.if FPU - | lfd f0, 0(RC) -+ |.else -+ | lwz CARG1, 0(RC) -+ | lwz CARG2, 4(RC) -+ |.endif - | addi RC, RC, 8 -+ |.if FPU - | stfd f0, 0(RA) -+ |.else -+ | stw CARG1, 0(RA) -+ | stw CARG2, 4(RA) -+ |.endif - | cmplw RC, TMP3 - | addi RA, RA, 8 - | blt <6 // More vararg slots? -@@ -4651,14 +5386,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | li TMP1, 0 - |2: - | addi TMP3, TMP1, 8 -+ |.if FPU - | lfdx f0, RA, TMP1 -+ |.else -+ | add CARG3, RA, TMP1 -+ | lwz CARG1, 0(CARG3) -+ | lwz CARG2, 4(CARG3) -+ |.endif - | cmpw TMP3, RC -+ |.if FPU - | stfdx f0, TMP2, TMP1 -+ |.else -+ | add CARG3, TMP2, TMP1 -+ | stw CARG1, 0(CARG3) -+ | stw CARG2, 4(CARG3) -+ |.endif - | beq >3 - | addi TMP1, TMP3, 8 -+ |.if FPU - | lfdx f1, RA, TMP3 -+ |.else -+ | add CARG3, RA, TMP3 -+ | lwz CARG1, 0(CARG3) -+ | lwz CARG2, 4(CARG3) -+ |.endif - | cmpw TMP1, RC -+ |.if FPU - | stfdx f1, TMP2, TMP3 -+ |.else -+ | add CARG3, TMP2, TMP3 -+ | stw CARG1, 0(CARG3) -+ | stw CARG2, 4(CARG3) -+ |.endif - | bne <2 - |3: - |5: -@@ -4700,8 +5459,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | subi TMP2, BASE, 8 - | decode_RB8 RB, INS - if (op == BC_RET1) { -+ |.if FPU - | lfd f0, 0(RA) - | stfd f0, 0(TMP2) -+ |.else -+ | lwz CARG1, 0(RA) -+ | lwz CARG2, 4(RA) -+ | stw CARG1, 0(TMP2) -+ | stw CARG2, 4(TMP2) -+ |.endif - } - |5: - | cmplw RB, RD -@@ -4762,11 +5528,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - |4: - | stw CARG1, FORL_IDX*8+4(RA) - } else { -- | lwz TMP3, FORL_STEP*8(RA) -+ | lwz SAVE0, FORL_STEP*8(RA) - | lwz CARG3, FORL_STEP*8+4(RA) - | lwz TMP2, FORL_STOP*8(RA) - | lwz CARG2, FORL_STOP*8+4(RA) -- | cmplw cr7, TMP3, TISNUM -+ | cmplw cr7, SAVE0, TISNUM - | cmplw cr1, TMP2, TISNUM - | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq - | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq -@@ -4809,41 +5575,80 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - if (vk) { - |.if DUALNUM - |9: // FP loop. -+ |.if FPU - | lfd f1, FORL_IDX*8(RA) - |.else -+ | lwz CARG1, FORL_IDX*8(RA) -+ | lwz CARG2, FORL_IDX*8+4(RA) -+ |.endif -+ |.else - | lfdux f1, RA, BASE - |.endif -+ |.if FPU - | lfd f3, FORL_STEP*8(RA) - | lfd f2, FORL_STOP*8(RA) -- | lwz TMP3, FORL_STEP*8(RA) - | fadd f1, f1, f3 - | stfd f1, FORL_IDX*8(RA) -+ |.else -+ | lwz CARG3, FORL_STEP*8(RA) -+ | lwz CARG4, FORL_STEP*8+4(RA) -+ | mr SAVE1, RD -+ | blex __adddf3 -+ | mr RD, SAVE1 -+ | stw CRET1, FORL_IDX*8(RA) -+ | stw CRET2, FORL_IDX*8+4(RA) -+ | lwz CARG3, FORL_STOP*8(RA) -+ | lwz CARG4, FORL_STOP*8+4(RA) -+ |.endif -+ | lwz SAVE0, FORL_STEP*8(RA) - } else { - |.if DUALNUM - |9: // FP loop. - |.else - | lwzux TMP1, RA, BASE -- | lwz TMP3, FORL_STEP*8(RA) -+ | lwz SAVE0, FORL_STEP*8(RA) - | lwz TMP2, FORL_STOP*8(RA) - | cmplw cr0, TMP1, TISNUM -- | cmplw cr7, TMP3, TISNUM -+ | cmplw cr7, SAVE0, TISNUM - | cmplw cr1, TMP2, TISNUM - |.endif -+ |.if FPU - | lfd f1, FORL_IDX*8(RA) -+ |.else -+ | lwz CARG1, FORL_IDX*8(RA) -+ | lwz CARG2, FORL_IDX*8+4(RA) -+ |.endif - | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt -+ |.if FPU - | lfd f2, FORL_STOP*8(RA) -+ |.else -+ | lwz CARG3, FORL_STOP*8(RA) -+ | lwz CARG4, FORL_STOP*8+4(RA) -+ |.endif - | bge ->vmeta_for - } -- | cmpwi cr6, TMP3, 0 -+ | cmpwi cr6, SAVE0, 0 - if (op != BC_JFORL) { - | srwi RD, RD, 1 - } -+ |.if FPU - | stfd f1, FORL_EXT*8(RA) -+ |.else -+ | stw CARG1, FORL_EXT*8(RA) -+ | stw CARG2, FORL_EXT*8+4(RA) -+ |.endif - if (op != BC_JFORL) { - | add RD, PC, RD - } -+ |.if FPU - | fcmpu cr0, f1, f2 -+ |.else -+ | mr SAVE1, RD -+ | blex __ledf2 -+ | cmpwi CRET1, 0 -+ | mr RD, SAVE1 -+ |.endif - if (op == BC_JFORI) { - | addis PC, RD, -(BCBIAS_J*4 >> 16) - } --- -2.20.1 - diff --git a/0011-Use-https-for-freelists.org-links.patch b/0011-Use-https-for-freelists.org-links.patch deleted file mode 100644 index c0c2a19..0000000 --- a/0011-Use-https-for-freelists.org-links.patch +++ /dev/null @@ -1,25 +0,0 @@ -From f3d75075ed91137699c6071abe49e2252e794a9c Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Fri, 18 Aug 2017 12:52:14 +0200 -Subject: [PATCH 11/72] Use https for freelists.org links. - ---- - doc/ext_ffi_semantics.html | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html -index 899640c..ae3c037 100644 ---- a/doc/ext_ffi_semantics.html -+++ b/doc/ext_ffi_semantics.html -@@ -844,7 +844,7 @@ place of a type, you'd need to use ffi.typeof("int") instead. -

- The main use for parameterized types are libraries implementing abstract - data types --(» example), -+(example), - similar to what can be achieved with C++ template metaprogramming. - Another use case are derived types of anonymous structs, which avoids - pollution of the global struct namespace. --- -2.20.1 - diff --git a/0012-x64-LJ_GC64-Fix-fallback-case-of-asm_fuseloadk64.patch b/0012-x64-LJ_GC64-Fix-fallback-case-of-asm_fuseloadk64.patch deleted file mode 100644 index 80ca5b0..0000000 --- a/0012-x64-LJ_GC64-Fix-fallback-case-of-asm_fuseloadk64.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 6b0824852677cc12570c20a3211fbfe0e4f0ce14 Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Mon, 28 Aug 2017 10:43:37 +0200 -Subject: [PATCH 12/72] x64/LJ_GC64: Fix fallback case of asm_fuseloadk64(). - -Contributed by Peter Cawley. ---- - src/lj_asm_x86.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h -index 3e189b1..55c02d2 100644 ---- a/src/lj_asm_x86.h -+++ b/src/lj_asm_x86.h -@@ -387,6 +387,7 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) - ir->i = (int32_t)(as->mctop - as->mcbot); - as->mcbot += 8; - as->mclim = as->mcbot + MCLIM_REDZONE; -+ lj_mcode_commitbot(as->J, as->mcbot); - } - as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i); - as->mrm.base = RID_RIP; --- -2.20.1 - diff --git a/0013-PPC-Add-soft-float-support-to-JIT-compiler-backend.patch b/0013-PPC-Add-soft-float-support-to-JIT-compiler-backend.patch deleted file mode 100644 index faaa94a..0000000 --- a/0013-PPC-Add-soft-float-support-to-JIT-compiler-backend.patch +++ /dev/null @@ -1,751 +0,0 @@ -From 71b7bc88341945f13f3951e2bb5fd247b639ff7a Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Sun, 3 Sep 2017 23:20:53 +0200 -Subject: [PATCH 13/72] PPC: Add soft-float support to JIT compiler backend. - -Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. -Sponsored by Cisco Systems, Inc. ---- - src/lj_arch.h | 1 - - src/lj_asm_ppc.h | 321 ++++++++++++++++++++++++++++++++++++++++------- - 2 files changed, 278 insertions(+), 44 deletions(-) - -diff --git a/src/lj_arch.h b/src/lj_arch.h -index 0145a7c..5962f3a 100644 ---- a/src/lj_arch.h -+++ b/src/lj_arch.h -@@ -273,7 +273,6 @@ - #endif - - #if LJ_ABI_SOFTFP --#define LJ_ARCH_NOJIT 1 /* NYI */ - #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL - #else - #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE -diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h -index 6daa861..1955429 100644 ---- a/src/lj_asm_ppc.h -+++ b/src/lj_asm_ppc.h -@@ -226,6 +226,7 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref, - emit_tab(as, pi, rt, left, right); - } - -+#if !LJ_SOFTFP - /* Fuse to multiply-add/sub instruction. */ - static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) - { -@@ -245,6 +246,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) - } - return 0; - } -+#endif - - /* -- Calls --------------------------------------------------------------- */ - -@@ -253,13 +255,17 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) - { - uint32_t n, nargs = CCI_XNARGS(ci); - int32_t ofs = 8; -- Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; -+ Reg gpr = REGARG_FIRSTGPR; -+#if !LJ_SOFTFP -+ Reg fpr = REGARG_FIRSTFPR; -+#endif - if ((void *)ci->func) - emit_call(as, (void *)ci->func); - for (n = 0; n < nargs; n++) { /* Setup args. */ - IRRef ref = args[n]; - if (ref) { - IRIns *ir = IR(ref); -+#if !LJ_SOFTFP - if (irt_isfp(ir->t)) { - if (fpr <= REGARG_LASTFPR) { - lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ -@@ -271,7 +277,9 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) - emit_spstore(as, ir, r, ofs); - ofs += irt_isnum(ir->t) ? 8 : 4; - } -- } else { -+ } else -+#endif -+ { - if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ - ra_leftov(as, gpr, ref); -@@ -290,8 +298,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) - } - checkmclim(as); - } -+#if !LJ_SOFTFP - if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ - emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); -+#endif - } - - /* Setup result reg/sp for call. Evict scratch regs. */ -@@ -299,8 +309,10 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) - { - RegSet drop = RSET_SCRATCH; - int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); -+#if !LJ_SOFTFP - if ((ci->flags & CCI_NOFPRCLOBBER)) - drop &= ~RSET_FPR; -+#endif - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - if (hiop && ra_hasreg((ir+1)->r)) -@@ -308,7 +320,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) - ra_evictset(as, drop); /* Evictions must be performed first. */ - if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); -- if (irt_isfp(ir->t)) { -+ if (!LJ_SOFTFP && irt_isfp(ir->t)) { - if ((ci->flags & CCI_CASTU64)) { - /* Use spill slot or temp slots. */ - int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; -@@ -377,6 +389,7 @@ static void asm_retf(ASMState *as, IRIns *ir) - - /* -- Type conversions ---------------------------------------------------- */ - -+#if !LJ_SOFTFP - static void asm_tointg(ASMState *as, IRIns *ir, Reg left) - { - RegSet allow = RSET_FPR; -@@ -409,15 +422,23 @@ static void asm_tobit(ASMState *as, IRIns *ir) - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_fab(as, PPCI_FADD, tmp, left, right); - } -+#endif - - static void asm_conv(ASMState *as, IRIns *ir) - { - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); -+#if !LJ_SOFTFP - int stfp = (st == IRT_NUM || st == IRT_FLOAT); -+#endif - IRRef lref = ir->op1; -- lua_assert(irt_type(ir->t) != st); - lua_assert(!(irt_isint64(ir->t) || - (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ -+#if LJ_SOFTFP -+ /* FP conversions are handled by SPLIT. */ -+ lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); -+ /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ -+#else -+ lua_assert(irt_type(ir->t) != st); - if (irt_isfp(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - if (stfp) { /* FP to FP conversion. */ -@@ -476,7 +497,9 @@ static void asm_conv(ASMState *as, IRIns *ir) - emit_fb(as, PPCI_FCTIWZ, tmp, left); - } - } -- } else { -+ } else -+#endif -+ { - Reg dest = ra_dest(as, ir, RSET_GPR); - if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); -@@ -496,17 +519,41 @@ static void asm_strto(ASMState *as, IRIns *ir) - { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; - IRRef args[2]; -- int32_t ofs; -+ int32_t ofs = SPOFS_TMP; -+#if LJ_SOFTFP -+ ra_evictset(as, RSET_SCRATCH); -+ if (ra_used(ir)) { -+ if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && -+ (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) { -+ int i; -+ for (i = 0; i < 2; i++) { -+ Reg r = (ir+i)->r; -+ if (ra_hasreg(r)) { -+ ra_free(as, r); -+ ra_modified(as, r); -+ emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); -+ } -+ } -+ ofs = sps_scale(ir->s & ~1); -+ } else { -+ Reg rhi = ra_dest(as, ir+1, RSET_GPR); -+ Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); -+ emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs); -+ emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4); -+ } -+ } -+#else - RegSet drop = RSET_SCRATCH; - if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ - ra_evictset(as, drop); -+ if (ir->s) ofs = sps_scale(ir->s); -+#endif - asm_guardcc(as, CC_EQ); - emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ - args[0] = ir->op1; /* GCstr *str */ - args[1] = ASMREF_TMP1; /* TValue *n */ - asm_gencall(as, ci, args); - /* Store the result to the spill slot or temp slots. */ -- ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; - emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); - } - -@@ -530,7 +577,10 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) - Reg src = ra_alloc1(as, ref, allow); - emit_setgl(as, src, tmptv.gcr); - } -- type = ra_allock(as, irt_toitype(ir->t), allow); -+ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) -+ type = ra_alloc1(as, ref+1, allow); -+ else -+ type = ra_allock(as, irt_toitype(ir->t), allow); - emit_setgl(as, type, tmptv.it); - } - } -@@ -574,11 +624,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - Reg tisnum = RID_NONE, tmpnum = RID_NONE; - IRRef refkey = ir->op2; - IRIns *irkey = IR(refkey); -+ int isk = irref_isk(refkey); - IRType1 kt = irkey->t; - uint32_t khash; - MCLabel l_end, l_loop, l_next; - - rset_clear(allow, tab); -+#if LJ_SOFTFP -+ if (!isk) { -+ key = ra_alloc1(as, refkey, allow); -+ rset_clear(allow, key); -+ if (irkey[1].o == IR_HIOP) { -+ if (ra_hasreg((irkey+1)->r)) { -+ tmpnum = (irkey+1)->r; -+ ra_noweak(as, tmpnum); -+ } else { -+ tmpnum = ra_allocref(as, refkey+1, allow); -+ } -+ rset_clear(allow, tmpnum); -+ } -+ } -+#else - if (irt_isnum(kt)) { - key = ra_alloc1(as, refkey, RSET_FPR); - tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); -@@ -588,6 +654,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - } -+#endif - tmp2 = ra_scratch(as, allow); - rset_clear(allow, tmp2); - -@@ -610,7 +677,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - asm_guardcc(as, CC_EQ); - else - emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); -- if (irt_isnum(kt)) { -+ if (!LJ_SOFTFP && irt_isnum(kt)) { - emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); - emit_condbranch(as, PPCI_BC, CC_GE, l_next); - emit_ab(as, PPCI_CMPLW, tmp1, tisnum); -@@ -620,7 +687,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - emit_ab(as, PPCI_CMPW, tmp2, key); - emit_condbranch(as, PPCI_BC, CC_NE, l_next); - } -- emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); -+ if (LJ_SOFTFP && ra_hasreg(tmpnum)) -+ emit_ab(as, PPCI_CMPW, tmp1, tmpnum); -+ else -+ emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); - if (!irt_ispri(kt)) - emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); - } -@@ -629,19 +699,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - (((char *)as->mcp-(char *)l_loop) & 0xffffu); - - /* Load main position relative to tab->node into dest. */ -- khash = irref_isk(refkey) ? ir_khash(irkey) : 1; -+ khash = isk ? ir_khash(irkey) : 1; - if (khash == 0) { - emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); - } else { - Reg tmphash = tmp1; -- if (irref_isk(refkey)) -+ if (isk) - tmphash = ra_allock(as, khash, allow); - emit_tab(as, PPCI_ADD, dest, dest, tmp1); - emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); - emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); - emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); - emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); -- if (irref_isk(refkey)) { -+ if (isk) { - /* Nothing to do. */ - } else if (irt_isstr(kt)) { - emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); -@@ -651,13 +721,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); - emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); - emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); -- if (irt_isnum(kt)) { -+ if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { -+#if LJ_SOFTFP -+ emit_asb(as, PPCI_XOR, tmp2, key, tmp1); -+ emit_rotlwi(as, dest, tmp1, HASH_ROT1); -+ emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum); -+#else - int32_t ofs = ra_spill(as, irkey); - emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); - emit_rotlwi(as, dest, tmp1, HASH_ROT1); - emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); - emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); - emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); -+#endif - } else { - emit_asb(as, PPCI_XOR, tmp2, key, tmp1); - emit_rotlwi(as, dest, tmp1, HASH_ROT1); -@@ -784,8 +860,8 @@ static PPCIns asm_fxloadins(IRIns *ir) - case IRT_U8: return PPCI_LBZ; - case IRT_I16: return PPCI_LHA; - case IRT_U16: return PPCI_LHZ; -- case IRT_NUM: return PPCI_LFD; -- case IRT_FLOAT: return PPCI_LFS; -+ case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_LFD; -+ case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS; - default: return PPCI_LWZ; - } - } -@@ -795,8 +871,8 @@ static PPCIns asm_fxstoreins(IRIns *ir) - switch (irt_type(ir->t)) { - case IRT_I8: case IRT_U8: return PPCI_STB; - case IRT_I16: case IRT_U16: return PPCI_STH; -- case IRT_NUM: return PPCI_STFD; -- case IRT_FLOAT: return PPCI_STFS; -+ case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_STFD; -+ case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS; - default: return PPCI_STW; - } - } -@@ -839,7 +915,8 @@ static void asm_fstore(ASMState *as, IRIns *ir) - - static void asm_xload(ASMState *as, IRIns *ir) - { -- Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); -+ Reg dest = ra_dest(as, ir, -+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - if (irt_isi8(ir->t)) - emit_as(as, PPCI_EXTSB, dest, dest); -@@ -857,7 +934,8 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) - Reg src = ra_alloc1(as, irb->op1, RSET_GPR); - asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); - } else { -- Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); -+ Reg src = ra_alloc1(as, ir->op2, -+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, - rset_exclude(RSET_GPR, src), ofs); - } -@@ -871,10 +949,19 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) - Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; - RegSet allow = RSET_GPR; - int32_t ofs = AHUREF_LSX; -+ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) { -+ t.irt = IRT_NUM; -+ if (ra_used(ir+1)) { -+ type = ra_dest(as, ir+1, allow); -+ rset_clear(allow, type); -+ } -+ ofs = 0; -+ } - if (ra_used(ir)) { -- lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); -- if (!irt_isnum(t)) ofs = 0; -- dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); -+ lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || -+ irt_isint(ir->t) || irt_isaddr(ir->t)); -+ if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0; -+ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); - rset_clear(allow, dest); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); -@@ -883,12 +970,13 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) - asm_guardcc(as, CC_GE); - emit_ab(as, PPCI_CMPLW, type, tisnum); - if (ra_hasreg(dest)) { -- if (ofs == AHUREF_LSX) { -+ if (!LJ_SOFTFP && ofs == AHUREF_LSX) { - tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, - (idx&255)), (idx>>8))); - emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); - } else { -- emit_fai(as, PPCI_LFD, dest, idx, ofs); -+ emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx, -+ ofs+4*LJ_SOFTFP); - } - } - } else { -@@ -911,7 +999,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir) - int32_t ofs = AHUREF_LSX; - if (ir->r == RID_SINK) - return; -- if (irt_isnum(ir->t)) { -+ if (!LJ_SOFTFP && irt_isnum(ir->t)) { - src = ra_alloc1(as, ir->op2, RSET_FPR); - } else { - if (!irt_ispri(ir->t)) { -@@ -919,11 +1007,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir) - rset_clear(allow, src); - ofs = 0; - } -- type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); -+ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) -+ type = ra_alloc1(as, (ir+1)->op2, allow); -+ else -+ type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - rset_clear(allow, type); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); -- if (irt_isnum(ir->t)) { -+ if (!LJ_SOFTFP && irt_isnum(ir->t)) { - if (ofs == AHUREF_LSX) { - emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); - emit_slwi(as, RID_TMP, (idx>>8), 3); -@@ -948,21 +1039,33 @@ static void asm_sload(ASMState *as, IRIns *ir) - IRType1 t = ir->t; - Reg dest = RID_NONE, type = RID_NONE, base; - RegSet allow = RSET_GPR; -+ int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); -+ if (hiop) -+ t.irt = IRT_NUM; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ -- lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); -+ lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); - lua_assert(LJ_DUALNUM || - !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); -+#if LJ_SOFTFP -+ lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ -+ if (hiop && ra_used(ir+1)) { -+ type = ra_dest(as, ir+1, allow); -+ rset_clear(allow, type); -+ } -+#else - if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { - dest = ra_scratch(as, RSET_FPR); - asm_tointg(as, ir, dest); - t.irt = IRT_NUM; /* Continue with a regular number type check. */ -- } else if (ra_used(ir)) { -+ } else -+#endif -+ if (ra_used(ir)) { - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); -- dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); -+ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); - rset_clear(allow, dest); - base = ra_alloc1(as, REF_BASE, allow); - rset_clear(allow, base); -- if ((ir->op2 & IRSLOAD_CONVERT)) { -+ if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { - if (irt_isint(t)) { - emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); - dest = ra_scratch(as, RSET_FPR); -@@ -994,10 +1097,13 @@ dotypecheck: - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); - asm_guardcc(as, CC_GE); -- emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum); -+#if !LJ_SOFTFP - type = RID_TMP; -+#endif -+ emit_ab(as, PPCI_CMPLW, type, tisnum); - } -- if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4); -+ if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, -+ base, ofs-(LJ_SOFTFP?0:4)); - } else { - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - asm_guardcc(as, CC_NE); -@@ -1119,6 +1225,7 @@ static void asm_obar(ASMState *as, IRIns *ir) - - /* -- Arithmetic and logic operations ------------------------------------- */ - -+#if !LJ_SOFTFP - static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) - { - Reg dest = ra_dest(as, ir, RSET_FPR); -@@ -1146,13 +1253,17 @@ static void asm_fpmath(ASMState *as, IRIns *ir) - else - asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); - } -+#endif - - static void asm_add(ASMState *as, IRIns *ir) - { -+#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) - asm_fparith(as, ir, PPCI_FADD); -- } else { -+ } else -+#endif -+ { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - PPCIns pi; -@@ -1191,10 +1302,13 @@ static void asm_add(ASMState *as, IRIns *ir) - - static void asm_sub(ASMState *as, IRIns *ir) - { -+#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) - asm_fparith(as, ir, PPCI_FSUB); -- } else { -+ } else -+#endif -+ { - PPCIns pi = PPCI_SUBF; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left, right; -@@ -1220,9 +1334,12 @@ static void asm_sub(ASMState *as, IRIns *ir) - - static void asm_mul(ASMState *as, IRIns *ir) - { -+#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - asm_fparith(as, ir, PPCI_FMUL); -- } else { -+ } else -+#endif -+ { - PPCIns pi = PPCI_MULLW; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); -@@ -1250,9 +1367,12 @@ static void asm_mul(ASMState *as, IRIns *ir) - - static void asm_neg(ASMState *as, IRIns *ir) - { -+#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - asm_fpunary(as, ir, PPCI_FNEG); -- } else { -+ } else -+#endif -+ { - Reg dest, left; - PPCIns pi = PPCI_NEG; - if (as->flagmcp == as->mcp) { -@@ -1563,9 +1683,40 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) - PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)) - #define asm_bror(as, ir) lua_assert(0) - -+#if LJ_SOFTFP -+static void asm_sfpmin_max(ASMState *as, IRIns *ir) -+{ -+ CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp]; -+ IRRef args[4]; -+ MCLabel l_right, l_end; -+ Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR); -+ Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR); -+ Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR); -+ PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE; -+ righthi = (lefthi >> 8); lefthi &= 255; -+ rightlo = (leftlo >> 8); leftlo &= 255; -+ args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; -+ args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; -+ l_end = emit_label(as); -+ if (desthi != righthi) emit_mr(as, desthi, righthi); -+ if (destlo != rightlo) emit_mr(as, destlo, rightlo); -+ l_right = emit_label(as); -+ if (l_end != l_right) emit_jmp(as, l_end); -+ if (desthi != lefthi) emit_mr(as, desthi, lefthi); -+ if (destlo != leftlo) emit_mr(as, destlo, leftlo); -+ if (l_right == as->mcp+1) { -+ cond ^= 4; l_right = l_end; ++as->mcp; -+ } -+ emit_condbranch(as, PPCI_BC, cond, l_right); -+ ra_evictset(as, RSET_SCRATCH); -+ emit_cmpi(as, RID_RET, 1); -+ asm_gencall(as, &ci, args); -+} -+#endif -+ - static void asm_min_max(ASMState *as, IRIns *ir, int ismax) - { -- if (irt_isnum(ir->t)) { -+ if (!LJ_SOFTFP && irt_isnum(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg tmp = dest; - Reg right, left = ra_alloc2(as, ir, RSET_FPR); -@@ -1653,7 +1804,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc) - static void asm_comp(ASMState *as, IRIns *ir) - { - PPCCC cc = asm_compmap[ir->o]; -- if (irt_isnum(ir->t)) { -+ if (!LJ_SOFTFP && irt_isnum(ir->t)) { - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - asm_guardcc(as, (cc >> 4)); -@@ -1674,6 +1825,44 @@ static void asm_comp(ASMState *as, IRIns *ir) - - #define asm_equal(as, ir) asm_comp(as, ir) - -+#if LJ_SOFTFP -+/* SFP comparisons. */ -+static void asm_sfpcomp(ASMState *as, IRIns *ir) -+{ -+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; -+ RegSet drop = RSET_SCRATCH; -+ Reg r; -+ IRRef args[4]; -+ args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; -+ args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; -+ -+ for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { -+ if (!rset_test(as->freeset, r) && -+ regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) -+ rset_clear(drop, r); -+ } -+ ra_evictset(as, drop); -+ asm_setupresult(as, ir, ci); -+ switch ((IROp)ir->o) { -+ case IR_ULT: -+ asm_guardcc(as, CC_EQ); -+ emit_ai(as, PPCI_CMPWI, RID_RET, 0); -+ case IR_ULE: -+ asm_guardcc(as, CC_EQ); -+ emit_ai(as, PPCI_CMPWI, RID_RET, 1); -+ break; -+ case IR_GE: case IR_GT: -+ asm_guardcc(as, CC_EQ); -+ emit_ai(as, PPCI_CMPWI, RID_RET, 2); -+ default: -+ asm_guardcc(as, (asm_compmap[ir->o] & 0xf)); -+ emit_ai(as, PPCI_CMPWI, RID_RET, 0); -+ break; -+ } -+ asm_gencall(as, ci, args); -+} -+#endif -+ - #if LJ_HASFFI - /* 64 bit integer comparisons. */ - static void asm_comp64(ASMState *as, IRIns *ir) -@@ -1703,19 +1892,36 @@ static void asm_comp64(ASMState *as, IRIns *ir) - /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ - static void asm_hiop(ASMState *as, IRIns *ir) - { --#if LJ_HASFFI -+#if LJ_HASFFI || LJ_SOFTFP - /* HIOP is marked as a store because it needs its own DCE logic. */ - int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ - if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; - if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ - as->curins--; /* Always skip the CONV. */ -+#if LJ_HASFFI && !LJ_SOFTFP - if (usehi || uselo) - asm_conv64(as, ir); - return; -+#endif - } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ - as->curins--; /* Always skip the loword comparison. */ -+#if LJ_SOFTFP -+ if (!irt_isint(ir->t)) { -+ asm_sfpcomp(as, ir-1); -+ return; -+ } -+#endif -+#if LJ_HASFFI - asm_comp64(as, ir); -+#endif -+ return; -+#if LJ_SOFTFP -+ } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { -+ as->curins--; /* Always skip the loword min/max. */ -+ if (uselo || usehi) -+ asm_sfpmin_max(as, ir-1); - return; -+#endif - } else if ((ir-1)->o == IR_XSTORE) { - as->curins--; /* Handle both stores here. */ - if ((ir-1)->r != RID_SINK) { -@@ -1726,14 +1932,27 @@ static void asm_hiop(ASMState *as, IRIns *ir) - } - if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ - switch ((ir-1)->o) { -+#if LJ_HASFFI - case IR_ADD: as->curins--; asm_add64(as, ir); break; - case IR_SUB: as->curins--; asm_sub64(as, ir); break; - case IR_NEG: as->curins--; asm_neg64(as, ir); break; -+#endif -+#if LJ_SOFTFP -+ case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: -+ case IR_STRTO: -+ if (!uselo) -+ ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ -+ break; -+#endif - case IR_CALLN: -+ case IR_CALLS: - case IR_CALLXS: - if (!uselo) - ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ - break; -+#if LJ_SOFTFP -+ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: -+#endif - case IR_CNEWI: - /* Nothing to do here. Handled by lo op itself. */ - break; -@@ -1797,8 +2016,19 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) - if ((sn & SNAP_NORESTORE)) - continue; - if (irt_isnum(ir->t)) { -+#if LJ_SOFTFP -+ Reg tmp; -+ RegSet allow = rset_exclude(RSET_GPR, RID_BASE); -+ lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ -+ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); -+ emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); -+ if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); -+ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); -+ emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); -+#else - Reg src = ra_alloc1(as, ref, RSET_FPR); - emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); -+#endif - } else { - Reg type; - RegSet allow = rset_exclude(RSET_GPR, RID_BASE); -@@ -1811,6 +2041,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) - if ((sn & (SNAP_CONT|SNAP_FRAME))) { - if (s == 0) continue; /* Do not overwrite link to previous frame. */ - type = ra_allock(as, (int32_t)(*flinks--), allow); -+#if LJ_SOFTFP -+ } else if ((sn & SNAP_SOFTFPNUM)) { -+ type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); -+#endif - } else { - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - } -@@ -1947,14 +2181,15 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) - int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; - asm_collectargs(as, ir, ci, args); - for (i = 0; i < nargs; i++) -- if (args[i] && irt_isfp(IR(args[i])->t)) { -+ if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) { - if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; - } else { - if (ngpr > 0) ngpr--; else nslots++; - } - if (nslots > as->evenspill) /* Leave room for args in stack slots. */ - as->evenspill = nslots; -- return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); -+ return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) : -+ REGSP_HINT(RID_RET); - } - - static void asm_setup_target(ASMState *as) --- -2.20.1 - diff --git a/0014-x64-LJ_GC64-Fix-type-check-only-variant-of-SLOAD.patch b/0014-x64-LJ_GC64-Fix-type-check-only-variant-of-SLOAD.patch deleted file mode 100644 index 7e9dd8a..0000000 --- a/0014-x64-LJ_GC64-Fix-type-check-only-variant-of-SLOAD.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 05fbdf565c700365d22e38f11478101a0d92a23e Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Sun, 10 Sep 2017 14:05:30 +0200 -Subject: [PATCH 14/72] x64/LJ_GC64: Fix type-check-only variant of SLOAD. - -Thanks to Peter Cawley. ---- - src/lj_asm_x86.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h -index 55c02d2..af54dc7 100644 ---- a/src/lj_asm_x86.h -+++ b/src/lj_asm_x86.h -@@ -1759,7 +1759,7 @@ static void asm_sload(ASMState *as, IRIns *ir) - emit_i8(as, irt_toitype(t)); - emit_rr(as, XO_ARITHi8, XOg_CMP, tmp); - emit_shifti(as, XOg_SAR|REX_64, tmp, 47); -- emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs+4); -+ emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs); - #else - } else { - emit_i8(as, irt_toitype(t)); --- -2.20.1 - diff --git a/0015-MIPS64-Hide-internal-function.patch b/0015-MIPS64-Hide-internal-function.patch deleted file mode 100644 index 0e2f4fd..0000000 --- a/0015-MIPS64-Hide-internal-function.patch +++ /dev/null @@ -1,26 +0,0 @@ -From bf12f1dafb157008b963f829b57b2472b6993cc8 Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Mon, 18 Sep 2017 09:50:22 +0200 -Subject: [PATCH 15/72] MIPS64: Hide internal function. - ---- - src/lj_ccall.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/src/lj_ccall.c b/src/lj_ccall.c -index 799be48..25e938c 100644 ---- a/src/lj_ccall.c -+++ b/src/lj_ccall.c -@@ -848,7 +848,8 @@ noth: /* Not a homogeneous float/double aggregate. */ - return 0; /* Struct is in GPRs. */ - } - --void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, int ft) -+static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, -+ int ft) - { - if (LJ_ABI_SOFTFP ? ft : - ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) { --- -2.20.1 - diff --git a/0016-DynASM-x86-Fix-potential-REL_A-overflow.patch b/0016-DynASM-x86-Fix-potential-REL_A-overflow.patch deleted file mode 100644 index 66f5bf0..0000000 --- a/0016-DynASM-x86-Fix-potential-REL_A-overflow.patch +++ /dev/null @@ -1,34 +0,0 @@ -commit 6a2d8b0b4d49eb5aac600c219e5903420806e56e -Merge: bf12f1d 0c0e7b1 -Author: Mike Pall -Date: Wed Sep 20 19:42:34 2017 +0200 - - Merge branch 'master' into v2.1 - -From 0c0e7b168ea147866835954267c151ef789f64fb Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Wed, 20 Sep 2017 19:39:50 +0200 -Subject: [PATCH 16/72] DynASM/x86: Fix potential REL_A overflow. - -Thanks to Joshua Haberman. ---- - dynasm/dasm_x86.h | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h -index 90dc5d1..f9260b0 100644 ---- a/dynasm/dasm_x86.h -+++ b/dynasm/dasm_x86.h -@@ -395,7 +395,8 @@ int dasm_encode(Dst_DECL, void *buffer) - } - case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; - b++; n = (int)(ptrdiff_t)D->globals[-n]; -- case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ -+ case DASM_REL_A: rel_a: -+ n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ - case DASM_REL_PC: rel_pc: { - int shrink = *b++; - int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; } --- -2.20.1 - diff --git a/0017-LJ_GC64-Fix-ir_khash-for-non-string-GCobj.patch b/0017-LJ_GC64-Fix-ir_khash-for-non-string-GCobj.patch deleted file mode 100644 index aff6f20..0000000 --- a/0017-LJ_GC64-Fix-ir_khash-for-non-string-GCobj.patch +++ /dev/null @@ -1,29 +0,0 @@ -From b4ed3219a1a98dd9fe7d1e3eeea3b82f5a780948 Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Mon, 2 Oct 2017 09:22:46 +0200 -Subject: [PATCH 17/72] LJ_GC64: Fix ir_khash for non-string GCobj. - -Contributed by Peter Cawley. ---- - src/lj_asm.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/src/lj_asm.c b/src/lj_asm.c -index bed2268..d961927 100644 ---- a/src/lj_asm.c -+++ b/src/lj_asm.c -@@ -1017,7 +1017,11 @@ static uint32_t ir_khash(IRIns *ir) - } else { - lua_assert(irt_isgcv(ir->t)); - lo = u32ptr(ir_kgc(ir)); -+#if LJ_GC64 -+ hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15); -+#else - hi = lo + HASH_BIAS; -+#endif - } - return hashrot(lo, hi); - } --- -2.20.1 - diff --git a/0018-LJ_GC64-Make-ASMREF_L-references-64-bit.patch b/0018-LJ_GC64-Make-ASMREF_L-references-64-bit.patch deleted file mode 100644 index d604876..0000000 --- a/0018-LJ_GC64-Make-ASMREF_L-references-64-bit.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 850f8c59d3d04a9847f21f32a6c36d8269b5b6b1 Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Mon, 2 Oct 2017 23:10:56 +0200 -Subject: [PATCH 18/72] LJ_GC64: Make ASMREF_L references 64 bit. - -Reported by Yichun Zhang. ---- - src/lj_asm.c | 1 + - src/lj_ir.h | 4 +++- - src/lj_opt_sink.c | 1 + - 3 files changed, 5 insertions(+), 1 deletion(-) - -diff --git a/src/lj_asm.c b/src/lj_asm.c -index d961927..753fe6b 100644 ---- a/src/lj_asm.c -+++ b/src/lj_asm.c -@@ -2015,6 +2015,7 @@ static void asm_setup_regsp(ASMState *as) - ir->prev = REGSP_INIT; - if (irt_is64(ir->t) && ir->o != IR_KNULL) { - #if LJ_GC64 -+ /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ - ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */ - #else - /* Make life easier for backends by putting address of constant in i. */ -diff --git a/src/lj_ir.h b/src/lj_ir.h -index 34c2785..8057a75 100644 ---- a/src/lj_ir.h -+++ b/src/lj_ir.h -@@ -377,10 +377,12 @@ typedef struct IRType1 { uint8_t irt; } IRType1; - #define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64)) - - #if LJ_GC64 -+/* Include IRT_NIL, so IR(ASMREF_L) (aka REF_NIL) is considered 64 bit. */ - #define IRT_IS64 \ - ((1u<cur.nk); ir < irbase; ir++) { - irt_clearmark(ir->t); - ir->prev = REGSP_INIT; -+ /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ - if (irt_is64(ir->t) && ir->o != IR_KNULL) - ir++; - } --- -2.20.1 - diff --git a/0019-Fix-FOLD-rule-for-strength-reduction-of-widening.patch b/0019-Fix-FOLD-rule-for-strength-reduction-of-widening.patch deleted file mode 100644 index c999ce8..0000000 --- a/0019-Fix-FOLD-rule-for-strength-reduction-of-widening.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 9f0caad0e43f97a4613850b3874b851cb1bc301d Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Wed, 8 Nov 2017 12:53:05 +0100 -Subject: [PATCH 19/72] Fix FOLD rule for strength reduction of widening. - -Reported by Matthew Burk. ---- - src/lj_opt_fold.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c -index 3d0e35a..5dc7ae3 100644 ---- a/src/lj_opt_fold.c -+++ b/src/lj_opt_fold.c -@@ -1052,7 +1052,7 @@ LJFOLDF(simplify_conv_sext) - if (ref == J->scev.idx) { - IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; - lua_assert(irt_isint(J->scev.t)); -- if (lo && IR(lo)->i + ofs >= 0) { -+ if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) { - ok_reduce: - #if LJ_TARGET_X64 - /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */ --- -2.20.1 - diff --git a/0020-ARM64-Fix-assembly-of-HREFK.patch b/0020-ARM64-Fix-assembly-of-HREFK.patch deleted file mode 100644 index 3200304..0000000 --- a/0020-ARM64-Fix-assembly-of-HREFK.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 06cd9fce7df440323647174f1ca4a01281ec8acd Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Wed, 8 Nov 2017 12:53:48 +0100 -Subject: [PATCH 20/72] ARM64: Fix assembly of HREFK. - -Reported by Jason Teplitz. ---- - src/lj_asm_arm64.h | 11 +++++------ - 1 file changed, 5 insertions(+), 6 deletions(-) - -diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h -index 8fd92e7..cbb186d 100644 ---- a/src/lj_asm_arm64.h -+++ b/src/lj_asm_arm64.h -@@ -869,14 +869,12 @@ static void asm_hrefk(ASMState *as, IRIns *ir) - int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); - int32_t kofs = ofs + (int32_t)offsetof(Node, key); - int bigofs = !emit_checkofs(A64I_LDRx, ofs); -- RegSet allow = RSET_GPR; - Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; -- Reg node = ra_alloc1(as, ir->op1, allow); -- Reg key = ra_scratch(as, rset_clear(allow, node)); -- Reg idx = node; -+ Reg node = ra_alloc1(as, ir->op1, RSET_GPR); -+ Reg key, idx = node; -+ RegSet allow = rset_exclude(RSET_GPR, node); - uint64_t k; - lua_assert(ofs % sizeof(Node) == 0); -- rset_clear(allow, key); - if (bigofs) { - idx = dest; - rset_clear(allow, dest); -@@ -892,7 +890,8 @@ static void asm_hrefk(ASMState *as, IRIns *ir) - } else { - k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); - } -- emit_nm(as, A64I_CMPx, key, ra_allock(as, k, allow)); -+ key = ra_scratch(as, allow); -+ emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key))); - emit_lso(as, A64I_LDRx, key, idx, kofs); - if (bigofs) - emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR); --- -2.20.1 - diff --git a/0021-MIPS64-Fix-register-allocation-in-assembly-of-HREF.patch b/0021-MIPS64-Fix-register-allocation-in-assembly-of-HREF.patch deleted file mode 100644 index 80fad2f..0000000 --- a/0021-MIPS64-Fix-register-allocation-in-assembly-of-HREF.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 99cdfbf6a1e8856f64908072ef10443a7eab14f2 Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Wed, 8 Nov 2017 12:54:03 +0100 -Subject: [PATCH 21/72] MIPS64: Fix register allocation in assembly of HREF. - -Contributed by James Cowgill. ---- - src/lj_asm_mips.h | 42 +++++++++++++++++++++++++----------------- - 1 file changed, 25 insertions(+), 17 deletions(-) - -diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h -index 1406a87..3a4679b 100644 ---- a/src/lj_asm_mips.h -+++ b/src/lj_asm_mips.h -@@ -859,6 +859,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - Reg dest = ra_dest(as, ir, allow); - Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); - Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; -+#if LJ_64 -+ Reg cmp64 = RID_NONE; -+#endif - IRRef refkey = ir->op2; - IRIns *irkey = IR(refkey); - int isk = irref_isk(refkey); -@@ -901,6 +904,26 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - #endif - tmp2 = ra_scratch(as, allow); - rset_clear(allow, tmp2); -+#if LJ_64 -+ if (LJ_SOFTFP || !irt_isnum(kt)) { -+ /* Allocate cmp64 register used for 64-bit comparisons */ -+ if (LJ_SOFTFP && irt_isnum(kt)) { -+ cmp64 = key; -+ } else if (!isk && irt_isaddr(kt)) { -+ cmp64 = tmp2; -+ } else { -+ int64_t k; -+ if (isk && irt_isaddr(kt)) { -+ k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; -+ } else { -+ lua_assert(irt_ispri(kt) && !irt_isnil(kt)); -+ k = ~((int64_t)~irt_toitype(ir->t) << 47); -+ } -+ cmp64 = ra_allock(as, k, allow); -+ rset_clear(allow, cmp64); -+ } -+ } -+#endif - - /* Key not found in chain: jump to exit (if merged) or load niltv. */ - l_end = emit_label(as); -@@ -943,24 +966,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15); - emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum); - emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); -- } else if (LJ_SOFTFP && irt_isnum(kt)) { -- emit_branch(as, MIPSI_BEQ, tmp1, key, l_end); -- emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); -- } else if (irt_isaddr(kt)) { -- Reg refk = tmp2; -- if (isk) { -- int64_t k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; -- refk = ra_allock(as, k, allow); -- rset_clear(allow, refk); -- } -- emit_branch(as, MIPSI_BEQ, tmp1, refk, l_end); -- emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); - } else { -- Reg pri = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); -- rset_clear(allow, pri); -- lua_assert(irt_ispri(kt) && !irt_isnil(kt)); -- emit_branch(as, MIPSI_BEQ, tmp1, pri, l_end); -- emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); -+ emit_branch(as, MIPSI_BEQ, tmp1, cmp64, l_end); -+ emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); - } - *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); - if (!isk && irt_isaddr(kt)) { --- -2.20.1 - diff --git a/0022-ARM64-Fix-xpcall-error-case.patch b/0022-ARM64-Fix-xpcall-error-case.patch deleted file mode 100644 index ec05a7c..0000000 --- a/0022-ARM64-Fix-xpcall-error-case.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 33082a6f4778aa152f6a4a684a7fe79436f1ecb6 Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Thu, 16 Nov 2017 12:53:34 +0100 -Subject: [PATCH 22/72] ARM64: Fix xpcall() error case. - -Thanks to Stefan Pejic. ---- - src/vm_arm64.dasc | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc -index 3eaf376..241c58a 100644 ---- a/src/vm_arm64.dasc -+++ b/src/vm_arm64.dasc -@@ -1185,12 +1185,12 @@ static void build_subroutines(BuildCtx *ctx) - | subs NARGS8:RC, NARGS8:RC, #16 - | blo ->fff_fallback - | mov RB, BASE -- | add BASE, BASE, #24 - | asr ITYPE, CARG2, #47 - | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1 - | cmn ITYPE, #-LJ_TFUNC - | add PC, TMP0, #24+FRAME_PCALL - | bne ->fff_fallback // Traceback must be a function. -+ | add BASE, BASE, #24 - | stp CARG2, CARG1, [RB] // Swap function and traceback. - | cbz NARGS8:RC, ->vm_call_dispatch - | b <1 --- -2.20.1 - diff --git a/0023-Fix-saved-bytecode-encapsulated-in-ELF-objects.patch b/0023-Fix-saved-bytecode-encapsulated-in-ELF-objects.patch deleted file mode 100644 index 740a5a7..0000000 --- a/0023-Fix-saved-bytecode-encapsulated-in-ELF-objects.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 7dbf0b05f1228c1c719866db5e5f3d58f87f74c8 Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Thu, 16 Nov 2017 12:58:12 +0100 -Subject: [PATCH 23/72] Fix saved bytecode encapsulated in ELF objects. - -Thanks to Dimitry Andric. ---- - src/jit/bcsave.lua | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua -index aa677df..c94064e 100644 ---- a/src/jit/bcsave.lua -+++ b/src/jit/bcsave.lua -@@ -275,7 +275,7 @@ typedef struct { - o.sect[2].size = fofs(ofs) - o.sect[3].type = f32(3) -- .strtab - o.sect[3].ofs = fofs(sofs + ofs) -- o.sect[3].size = fofs(#symname+1) -+ o.sect[3].size = fofs(#symname+2) - ffi.copy(o.space+ofs+1, symname) - ofs = ofs + #symname + 2 - o.sect[4].type = f32(1) -- .rodata --- -2.20.1 - diff --git a/0024-ARM64-Fix-xpcall-error-case-really.patch b/0024-ARM64-Fix-xpcall-error-case-really.patch deleted file mode 100644 index ab518e1..0000000 --- a/0024-ARM64-Fix-xpcall-error-case-really.patch +++ /dev/null @@ -1,37 +0,0 @@ -From d417ded17945b4211608d497d50b509e0274f5e0 Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Sat, 18 Nov 2017 12:23:57 +0100 -Subject: [PATCH 24/72] ARM64: Fix xpcall() error case (really). -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Thanks to François Perrad and Stefan Pejic. ---- - src/vm_arm64.dasc | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc -index 241c58a..c55794a 100644 ---- a/src/vm_arm64.dasc -+++ b/src/vm_arm64.dasc -@@ -1182,7 +1182,7 @@ static void build_subroutines(BuildCtx *ctx) - |.ffunc xpcall - | ldp CARG1, CARG2, [BASE] - | ldrb TMP0w, GL->hookmask -- | subs NARGS8:RC, NARGS8:RC, #16 -+ | subs NARGS8:TMP1, NARGS8:RC, #16 - | blo ->fff_fallback - | mov RB, BASE - | asr ITYPE, CARG2, #47 -@@ -1190,6 +1190,7 @@ static void build_subroutines(BuildCtx *ctx) - | cmn ITYPE, #-LJ_TFUNC - | add PC, TMP0, #24+FRAME_PCALL - | bne ->fff_fallback // Traceback must be a function. -+ | mov NARGS8:RC, NARGS8:TMP1 - | add BASE, BASE, #24 - | stp CARG2, CARG1, [RB] // Swap function and traceback. - | cbz NARGS8:RC, ->vm_call_dispatch --- -2.20.1 - diff --git a/0025-MIPS64-Fix-xpcall-error-case.patch b/0025-MIPS64-Fix-xpcall-error-case.patch deleted file mode 100644 index 5b17e81..0000000 --- a/0025-MIPS64-Fix-xpcall-error-case.patch +++ /dev/null @@ -1,39 +0,0 @@ -From ea7071d3c30b6432bfe6f8a9d263e0285cec25e3 Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Sat, 18 Nov 2017 12:25:35 +0100 -Subject: [PATCH 25/72] MIPS64: Fix xpcall() error case. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Thanks to François Perrad and Stefan Pejic. ---- - src/vm_mips64.dasc | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc -index 75b38de..a78cd25 100644 ---- a/src/vm_mips64.dasc -+++ b/src/vm_mips64.dasc -@@ -1399,15 +1399,16 @@ static void build_subroutines(BuildCtx *ctx) - |. nop - | - |.ffunc xpcall -- | daddiu NARGS8:RC, NARGS8:RC, -16 -+ | daddiu NARGS8:TMP0, NARGS8:RC, -16 - | ld CARG1, 0(BASE) - | ld CARG2, 8(BASE) -- | bltz NARGS8:RC, ->fff_fallback -+ | bltz NARGS8:TMP0, ->fff_fallback - |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) - | gettp AT, CARG2 - | daddiu AT, AT, -LJ_TFUNC - | bnez AT, ->fff_fallback // Traceback must be a function. - |. move TMP2, BASE -+ | move NARGS8:RC, NARGS8:TMP0 - | daddiu BASE, BASE, 24 - | // Remember active hook before pcall. - | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT --- -2.20.1 - diff --git a/0026-Fix-IR_BUFPUT-assembly.patch b/0026-Fix-IR_BUFPUT-assembly.patch deleted file mode 100644 index c942467..0000000 --- a/0026-Fix-IR_BUFPUT-assembly.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 58d0dde0a2df49abc991decbabff15230010829a Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Sun, 14 Jan 2018 13:57:00 +0100 -Subject: [PATCH 26/72] Fix IR_BUFPUT assembly. - -Thanks to Peter Cawley. ---- - src/lj_asm.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/src/lj_asm.c b/src/lj_asm.c -index 753fe6b..5f83779 100644 ---- a/src/lj_asm.c -+++ b/src/lj_asm.c -@@ -1119,7 +1119,7 @@ static void asm_bufput(ASMState *as, IRIns *ir) - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr]; - IRRef args[3]; - IRIns *irs; -- int kchar = -1; -+ int kchar = -129; - args[0] = ir->op1; /* SBuf * */ - args[1] = ir->op2; /* GCstr * */ - irs = IR(ir->op2); -@@ -1127,7 +1127,7 @@ static void asm_bufput(ASMState *as, IRIns *ir) - if (irs->o == IR_KGC) { - GCstr *s = ir_kstr(irs); - if (s->len == 1) { /* Optimize put of single-char string constant. */ -- kchar = strdata(s)[0]; -+ kchar = (int8_t)strdata(s)[0]; /* Signed! */ - args[1] = ASMREF_TMP1; /* int, truncated to char */ - ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; - } -@@ -1154,7 +1154,7 @@ static void asm_bufput(ASMState *as, IRIns *ir) - asm_gencall(as, ci, args); - if (args[1] == ASMREF_TMP1) { - Reg tmp = ra_releasetmp(as, ASMREF_TMP1); -- if (kchar == -1) -+ if (kchar == -129) - asm_tvptr(as, tmp, irs->op1); - else - ra_allockreg(as, kchar, tmp); --- -2.20.1 - diff --git a/0027-Fix-string.format-c-0.patch b/0027-Fix-string.format-c-0.patch deleted file mode 100644 index caece09..0000000 --- a/0027-Fix-string.format-c-0.patch +++ /dev/null @@ -1,15 +0,0 @@ -commit 4660dbfa8a4f9eea5218b739075d04faadfeeef6 -Merge: 58d0dde 430d9f8 -Author: Mike Pall -Date: Sun Jan 14 14:26:10 2018 +0100 - - Merge branch 'master' into v2.1 - -From 430d9f8f7ebb779948dbd43944b876b1a3f58551 Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Sun, 14 Jan 2018 14:11:59 +0100 -Subject: [PATCH 27/72] Fix string.format("%c", 0). - ---- - src/lib_string.c | 15 ++++++++------- - 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/0028-Fix-ARMv8-32-bit-subset-detection.patch b/0028-Fix-ARMv8-32-bit-subset-detection.patch deleted file mode 100644 index 00687af..0000000 --- a/0028-Fix-ARMv8-32-bit-subset-detection.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 9eaad8574f5b2271b981cd31966b1e832cd8de12 Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Thu, 18 Jan 2018 12:24:36 +0100 -Subject: [PATCH 28/72] Fix ARMv8 (32 bit subset) detection. - -Thanks to Markus Oberhumber. ---- - src/lj_arch.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/lj_arch.h b/src/lj_arch.h -index 5962f3a..fcebd84 100644 ---- a/src/lj_arch.h -+++ b/src/lj_arch.h -@@ -201,7 +201,7 @@ - #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ - #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL - --#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__ -+#if __ARM_ARCH_8__ || __ARM_ARCH_8A__ - #define LJ_ARCH_VERSION 80 - #elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ - #define LJ_ARCH_VERSION 70 --- -2.20.1 - diff --git a/0029-Fix-LuaJIT-API-docs-for-LUAJIT_MODE_.patch b/0029-Fix-LuaJIT-API-docs-for-LUAJIT_MODE_.patch deleted file mode 100644 index 70ae35a..0000000 --- a/0029-Fix-LuaJIT-API-docs-for-LUAJIT_MODE_.patch +++ /dev/null @@ -1,28 +0,0 @@ -From c88602f080dcafea6ba222a2f7cc1ea0e41ef3cc Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Thu, 18 Jan 2018 12:29:39 +0100 -Subject: [PATCH 29/72] Fix LuaJIT API docs for LUAJIT_MODE_*. - -Thanks to sunfishgao. ---- - doc/ext_c_api.html | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html -index 041a722..4bb8251 100644 ---- a/doc/ext_c_api.html -+++ b/doc/ext_c_api.html -@@ -89,8 +89,8 @@ other Lua/C API functions). -

-

- The third argument specifies the mode, which is 'or'ed with a flag. --The flag can be LUAJIT_MODE_OFF to turn a feature on, --LUAJIT_MODE_ON to turn a feature off, or -+The flag can be LUAJIT_MODE_OFF to turn a feature off, -+LUAJIT_MODE_ON to turn a feature on, or - LUAJIT_MODE_FLUSH to flush cached code. -

-

--- -2.20.1 - diff --git a/0030-MIPS64-Fix-soft-float-0.0-vs.-0.0-comparison.patch b/0030-MIPS64-Fix-soft-float-0.0-vs.-0.0-comparison.patch deleted file mode 100644 index 8ee3a17..0000000 --- a/0030-MIPS64-Fix-soft-float-0.0-vs.-0.0-comparison.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 8071aa4ad65cf09e3b7adda4a7787d8897e5314c Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Mon, 29 Jan 2018 12:12:29 +0100 -Subject: [PATCH 30/72] MIPS64: Fix soft-float +-0.0 vs. +-0.0 comparison. - -Thanks to Stefan Pejic. ---- - src/vm_mips64.dasc | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc -index a78cd25..0a3f8e5 100644 ---- a/src/vm_mips64.dasc -+++ b/src/vm_mips64.dasc -@@ -2661,7 +2661,7 @@ static void build_subroutines(BuildCtx *ctx) - |. slt CRET1, CARG2, CARG1 - |8: - | jr ra -- |. nop -+ |. li CRET1, 0 - |9: - | jr ra - |. move CRET1, CRET2 --- -2.20.1 - diff --git a/0031-FFI-Don-t-assert-on-1LL-5.2-compatibility-mode-only.patch b/0031-FFI-Don-t-assert-on-1LL-5.2-compatibility-mode-only.patch deleted file mode 100644 index b95ca0c..0000000 --- a/0031-FFI-Don-t-assert-on-1LL-5.2-compatibility-mode-only.patch +++ /dev/null @@ -1,69 +0,0 @@ -commit 74c544d68c07bcd416225598cdf15f88e62fd457 -Merge: 8071aa4 b03a56f -Author: Mike Pall -Date: Mon Jan 29 12:53:42 2018 +0100 - - Merge branch 'master' into v2.1 - -From b03a56f28ec360bbcf43091afd0607890a4a33c7 Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Mon, 29 Jan 2018 12:47:08 +0100 -Subject: [PATCH 31/72] FFI: Don't assert on #1LL (5.2 compatibility mode - only). - -Reported by Denis Golovan. ---- - src/lib_ffi.c | 2 +- - src/lj_carith.c | 9 +++++++++ - src/lj_carith.h | 1 + - 3 files changed, 11 insertions(+), 1 deletion(-) - -diff --git a/src/lib_ffi.c b/src/lib_ffi.c -index f2f2ede..83483d9 100644 ---- a/src/lib_ffi.c -+++ b/src/lib_ffi.c -@@ -193,7 +193,7 @@ LJLIB_CF(ffi_meta___eq) LJLIB_REC(cdata_arith MM_eq) - - LJLIB_CF(ffi_meta___len) LJLIB_REC(cdata_arith MM_len) - { -- return ffi_arith(L); -+ return lj_carith_len(L); - } - - LJLIB_CF(ffi_meta___lt) LJLIB_REC(cdata_arith MM_lt) -diff --git a/src/lj_carith.c b/src/lj_carith.c -index 6224dee..c34596c 100644 ---- a/src/lj_carith.c -+++ b/src/lj_carith.c -@@ -272,6 +272,15 @@ int lj_carith_op(lua_State *L, MMS mm) - return lj_carith_meta(L, cts, &ca, mm); - } - -+/* No built-in functionality for length of cdata. */ -+int lj_carith_len(lua_State *L) -+{ -+ CTState *cts = ctype_cts(L); -+ CDArith ca; -+ carith_checkarg(L, cts, &ca); -+ return lj_carith_meta(L, cts, &ca, MM_len); -+} -+ - /* -- 64 bit bit operations helpers --------------------------------------- */ - - #if LJ_64 -diff --git a/src/lj_carith.h b/src/lj_carith.h -index 3c15591..82fc824 100644 ---- a/src/lj_carith.h -+++ b/src/lj_carith.h -@@ -11,6 +11,7 @@ - #if LJ_HASFFI - - LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); -+LJ_FUNC int lj_carith_len(lua_State *L); - - #if LJ_32 - LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh); - --- -2.20.1 - diff --git a/0032-Fix-GCC-7-Wimplicit-fallthrough-warnings.patch b/0032-Fix-GCC-7-Wimplicit-fallthrough-warnings.patch deleted file mode 100644 index 192f271..0000000 --- a/0032-Fix-GCC-7-Wimplicit-fallthrough-warnings.patch +++ /dev/null @@ -1,291 +0,0 @@ -commit 0bf46e1edf94c43795b5e491efe682ab70974ce7 -Merge: 74c544d d4ee803 -Author: Mike Pall -Date: Mon Jan 29 13:19:30 2018 +0100 - - Merge branch 'master' into v2.1 - -From d4ee80342770d1281e2ce877f8ae8ab1d99e6528 Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Mon, 29 Jan 2018 13:06:13 +0100 -Subject: [PATCH 32/72] Fix GCC 7 -Wimplicit-fallthrough warnings. - ---- - dynasm/dasm_arm.h | 2 ++ - dynasm/dasm_mips.h | 1 + - dynasm/dasm_ppc.h | 1 + - dynasm/dasm_x86.h | 14 ++++++++++++-- - src/lj_asm.c | 3 ++- - src/lj_cparse.c | 10 ++++++++++ - src/lj_err.c | 1 + - src/lj_opt_sink.c | 2 +- - src/lj_parse.c | 3 ++- - src/luajit.c | 1 + - 10 files changed, 33 insertions(+), 5 deletions(-) - -diff --git a/dynasm/dasm_arm.h b/dynasm/dasm_arm.h -index a43f7c6..1d404cc 100644 ---- a/dynasm/dasm_arm.h -+++ b/dynasm/dasm_arm.h -@@ -254,6 +254,7 @@ void dasm_put(Dst_DECL, int start, ...) - case DASM_IMMV8: - CK((n & 3) == 0, RANGE_I); - n >>= 2; -+ /* fallthrough */ - case DASM_IMML8: - case DASM_IMML12: - CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) : -@@ -371,6 +372,7 @@ int dasm_encode(Dst_DECL, void *buffer) - break; - case DASM_REL_LG: - CK(n >= 0, UNDEF_LG); -+ /* fallthrough */ - case DASM_REL_PC: - CK(n >= 0, UNDEF_PC); - n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) - 4; -diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h -index 7eac669..46af034 100644 ---- a/dynasm/dasm_mips.h -+++ b/dynasm/dasm_mips.h -@@ -350,6 +350,7 @@ int dasm_encode(Dst_DECL, void *buffer) - break; - case DASM_REL_LG: - CK(n >= 0, UNDEF_LG); -+ /* fallthrough */ - case DASM_REL_PC: - CK(n >= 0, UNDEF_PC); - n = *DASM_POS2PTR(D, n); -diff --git a/dynasm/dasm_ppc.h b/dynasm/dasm_ppc.h -index 6110361..81b9a76 100644 ---- a/dynasm/dasm_ppc.h -+++ b/dynasm/dasm_ppc.h -@@ -350,6 +350,7 @@ int dasm_encode(Dst_DECL, void *buffer) - break; - case DASM_REL_LG: - CK(n >= 0, UNDEF_LG); -+ /* fallthrough */ - case DASM_REL_PC: - CK(n >= 0, UNDEF_PC); - n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base); -diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h -index f9260b0..8ae911d 100644 ---- a/dynasm/dasm_x86.h -+++ b/dynasm/dasm_x86.h -@@ -194,12 +194,13 @@ void dasm_put(Dst_DECL, int start, ...) - switch (action) { - case DASM_DISP: - if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; } -- case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; -+ /* fallthrough */ -+ case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ - case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ - case DASM_IMM_D: ofs += 4; break; - case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob; - case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break; -- case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; -+ case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ - case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; - case DASM_SPACE: p++; ofs += n; break; - case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ -@@ -323,11 +324,14 @@ int dasm_link(Dst_DECL, size_t *szp) - pos += 2; - break; - } -+ /* fallthrough */ - case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++; -+ /* fallthrough */ - case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W: - case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB: - case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break; - case DASM_LABEL_LG: p++; -+ /* fallthrough */ - case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */ - case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */ - case DASM_EXTERN: p += 2; break; -@@ -385,12 +389,15 @@ int dasm_encode(Dst_DECL, void *buffer) - if (mrm != 5) { mm[-1] -= 0x80; break; } } - if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40; - } -+ /* fallthrough */ - case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break; - case DASM_IMM_DB: if (((n+128)&-256) == 0) { - db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb; - } else mark = NULL; -+ /* fallthrough */ - case DASM_IMM_D: wd: dasmd(n); break; - case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL; -+ /* fallthrough */ - case DASM_IMM_W: dasmw(n); break; - case DASM_VREG: { - int t = *p++; -@@ -397,6 +404,7 @@ - } - case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; - b++; n = (int)(ptrdiff_t)D->globals[-n]; -+ /* fallthrough */ - case DASM_REL_A: rel_a: - n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ - case DASM_REL_PC: rel_pc: { -@@ -407,6 +415,7 @@ int dasm_encode(Dst_DECL, void *buffer) - } - case DASM_IMM_LG: - p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; } -+ /* fallthrough */ - case DASM_IMM_PC: { - int *pb = DASM_POS2PTR(D, n); - n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base); -@@ -427,6 +436,7 @@ int dasm_encode(Dst_DECL, void *buffer) - case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd; - case DASM_MARK: mark = cp; break; - case DASM_ESC: action = *p++; -+ /* fallthrough */ - default: *cp++ = action; break; - case DASM_SECTION: case DASM_STOP: goto stop; - } -diff --git a/src/lj_asm.c b/src/lj_asm.c -index 02714d4..dd7186f 100644 ---- a/src/lj_asm.c -+++ b/src/lj_asm.c -@@ -2136,6 +2136,7 @@ static void asm_setup_regsp(ASMState *as) - case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT: - if (REGARG_NUMGPR < 3 && as->evenspill < 3) - as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ -+ /* fallthrough */ - #if LJ_TARGET_X86 && LJ_HASFFI - if (0) { - case IR_CNEW: -@@ -2176,7 +2177,7 @@ static void asm_setup_regsp(ASMState *as) - continue; - #endif - } -- /* fallthrough for integer POW */ -+ /* fallthrough */ /* for integer POW */ - case IR_DIV: case IR_MOD: - if (!irt_isnum(ir->t)) { - ir->prev = REGSP_HINT(RID_RET); -diff --git a/src/lj_cparse.c b/src/lj_cparse.c -index 2ba50a7..f111537 100644 ---- a/src/lj_cparse.c -+++ b/src/lj_cparse.c -@@ -590,28 +590,34 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) - k->id = k2.id > k3.id ? k2.id : k3.id; - continue; - } -+ /* fallthrough */ - case 1: - if (cp_opt(cp, CTOK_OROR)) { - cp_expr_sub(cp, &k2, 2); k->i32 = k->u32 || k2.u32; k->id = CTID_INT32; - continue; - } -+ /* fallthrough */ - case 2: - if (cp_opt(cp, CTOK_ANDAND)) { - cp_expr_sub(cp, &k2, 3); k->i32 = k->u32 && k2.u32; k->id = CTID_INT32; - continue; - } -+ /* fallthrough */ - case 3: - if (cp_opt(cp, '|')) { - cp_expr_sub(cp, &k2, 4); k->u32 = k->u32 | k2.u32; goto arith_result; - } -+ /* fallthrough */ - case 4: - if (cp_opt(cp, '^')) { - cp_expr_sub(cp, &k2, 5); k->u32 = k->u32 ^ k2.u32; goto arith_result; - } -+ /* fallthrough */ - case 5: - if (cp_opt(cp, '&')) { - cp_expr_sub(cp, &k2, 6); k->u32 = k->u32 & k2.u32; goto arith_result; - } -+ /* fallthrough */ - case 6: - if (cp_opt(cp, CTOK_EQ)) { - cp_expr_sub(cp, &k2, 7); k->i32 = k->u32 == k2.u32; k->id = CTID_INT32; -@@ -620,6 +626,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) - cp_expr_sub(cp, &k2, 7); k->i32 = k->u32 != k2.u32; k->id = CTID_INT32; - continue; - } -+ /* fallthrough */ - case 7: - if (cp_opt(cp, '<')) { - cp_expr_sub(cp, &k2, 8); -@@ -654,6 +661,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) - k->id = CTID_INT32; - continue; - } -+ /* fallthrough */ - case 8: - if (cp_opt(cp, CTOK_SHL)) { - cp_expr_sub(cp, &k2, 9); k->u32 = k->u32 << k2.u32; -@@ -666,6 +674,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) - k->u32 = k->u32 >> k2.u32; - continue; - } -+ /* fallthrough */ - case 9: - if (cp_opt(cp, '+')) { - cp_expr_sub(cp, &k2, 10); k->u32 = k->u32 + k2.u32; -@@ -675,6 +684,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) - } else if (cp_opt(cp, '-')) { - cp_expr_sub(cp, &k2, 10); k->u32 = k->u32 - k2.u32; goto arith_result; - } -+ /* fallthrough */ - case 10: - if (cp_opt(cp, '*')) { - cp_expr_unary(cp, &k2); k->u32 = k->u32 * k2.u32; goto arith_result; -diff --git a/src/lj_err.c b/src/lj_err.c -index 54f42c3..13a1ded 100644 ---- a/src/lj_err.c -+++ b/src/lj_err.c -@@ -153,6 +153,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) - case FRAME_CONT: /* Continuation frame. */ - if (frame_iscont_fficb(frame)) - goto unwind_c; -+ /* fallthrough */ - case FRAME_VARG: /* Vararg frame. */ - frame = frame_prevd(frame); - break; -diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c -index 6a00d04..4efe395 100644 ---- a/src/lj_opt_sink.c -+++ b/src/lj_opt_sink.c -@@ -100,8 +100,8 @@ static void sink_mark_ins(jit_State *J) - (LJ_32 && ir+1 < irlast && (ir+1)->o == IR_HIOP && - !sink_checkphi(J, ir, (ir+1)->op2)))) - irt_setmark(ir->t); /* Mark ineligible allocation. */ -- /* fallthrough */ - #endif -+ /* fallthrough */ - case IR_USTORE: - irt_setmark(IR(ir->op2)->t); /* Mark stored value. */ - break; -diff --git a/src/lj_parse.c b/src/lj_parse.c -index 9e5976f..6785495 100644 ---- a/src/lj_parse.c -+++ b/src/lj_parse.c -@@ -2696,7 +2696,8 @@ static int parse_stmt(LexState *ls) - lj_lex_next(ls); - parse_goto(ls); - break; -- } /* else: fallthrough */ -+ } -+ /* fallthrough */ - default: - parse_call_assign(ls); - break; -diff --git a/src/luajit.c b/src/luajit.c -index 9e15b26..0e18dc5 100644 ---- a/src/luajit.c -+++ b/src/luajit.c -@@ -419,6 +419,7 @@ static int collectargs(char **argv, int *flags) - break; - case 'e': - *flags |= FLAGS_EXEC; -+ /* fallthrough */ - case 'j': /* LuaJIT extension */ - case 'l': - *flags |= FLAGS_OPTION; --- -2.20.1 - diff --git a/0033-Clear-stack-after-print_jit_status-in-CLI.patch b/0033-Clear-stack-after-print_jit_status-in-CLI.patch deleted file mode 100644 index 53a4acf..0000000 --- a/0033-Clear-stack-after-print_jit_status-in-CLI.patch +++ /dev/null @@ -1,32 +0,0 @@ -commit fddef924097f28c46a0a5b45483a6086b33cab81 -Merge: 0bf46e1 03cd5aa -Author: Mike Pall -Date: Mon Jan 29 13:28:53 2018 +0100 - - Merge branch 'master' into v2.1 - -From 03cd5aa749c1bc3bb4b7d4289236b6096cb3dc85 Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Mon, 29 Jan 2018 13:25:51 +0100 -Subject: [PATCH 33/72] Clear stack after print_jit_status() in CLI. - -Suggested by Hydroque. ---- - src/luajit.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/src/luajit.c b/src/luajit.c -index 0e18dc5..9ede59c 100644 ---- a/src/luajit.c -+++ b/src/luajit.c -@@ -151,6 +151,7 @@ static void print_jit_status(lua_State *L) - fputs(s, stdout); - } - putc('\n', stdout); -+ lua_settop(L, 0); /* clear stack */ - } - - static void createargtable(lua_State *L, char **argv, int argc, int argf) --- -2.20.1 - diff --git a/0034-Fix-rechaining-of-pseudo-resurrected-string-keys.patch b/0034-Fix-rechaining-of-pseudo-resurrected-string-keys.patch deleted file mode 100644 index 1b90fb3..0000000 --- a/0034-Fix-rechaining-of-pseudo-resurrected-string-keys.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 046129dbdda5261c1b17469a2895a113d14c070a Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Tue, 27 Feb 2018 23:02:23 +0100 -Subject: [PATCH 34/72] Fix rechaining of pseudo-resurrected string keys. - -This is a serious bug. But extremely hard to reproduce, so it went -undetected for 8 years. One needs two resurrections with different -main nodes, which are both in a hash chain which gets relinked on -key insertion where the colliding node is in a non-main position. Phew. - -Thanks to lbeiming. ---- - src/lj_tab.c | 23 +++++++++++++++++++++++ - 1 file changed, 23 insertions(+) - -diff --git a/src/lj_tab.c b/src/lj_tab.c -index 50f447e..f2f3c0b 100644 ---- a/src/lj_tab.c -+++ b/src/lj_tab.c -@@ -457,6 +457,29 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) - freenode->next = nn->next; - nn->next = n->next; - setmref(n->next, nn); -+ /* -+ ** Rechaining a resurrected string key creates a new dilemma: -+ ** Another string key may have originally been resurrected via -+ ** _any_ of the previous nodes as a chain anchor. Including -+ ** a node that had to be moved, which makes them unreachable. -+ ** It's not feasible to check for all previous nodes, so rechain -+ ** any string key that's currently in a non-main positions. -+ */ -+ while ((nn = nextnode(freenode))) { -+ if (tvisstr(&nn->key) && !tvisnil(&nn->val)) { -+ Node *mn = hashstr(t, strV(&nn->key)); -+ if (mn != freenode) { -+ freenode->next = nn->next; -+ nn->next = mn->next; -+ setmref(mn->next, nn); -+ } else { -+ freenode = nn; -+ } -+ } else { -+ freenode = nn; -+ } -+ } -+ break; - } else { - freenode = nn; - } --- -2.20.1 - diff --git a/0035-DynASM-x86-Add-BMI1-and-BMI2-instructions.patch b/0035-DynASM-x86-Add-BMI1-and-BMI2-instructions.patch deleted file mode 100644 index 832809e..0000000 --- a/0035-DynASM-x86-Add-BMI1-and-BMI2-instructions.patch +++ /dev/null @@ -1,50 +0,0 @@ -From fe651bf6e2b4d02b624be3c289378c08bab2fa9b Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Tue, 27 Feb 2018 23:22:40 +0100 -Subject: [PATCH 35/72] DynASM/x86: Add BMI1 and BMI2 instructions. - -Thanks to Peter Cawley. ---- - dynasm/dasm_x86.lua | 19 +++++++++++++++++++ - 1 file changed, 19 insertions(+) - -diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua -index 4c031e2..c1d267a 100644 ---- a/dynasm/dasm_x86.lua -+++ b/dynasm/dasm_x86.lua -@@ -955,6 +955,7 @@ end - -- "u" Use VEX encoding, vvvv unused. - -- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is - -- removed from the list used by future characters). -+-- "w" Use VEX encoding, vvvv from 3rd operand. - -- "L" Force VEX.L - -- - -- All of the following characters force a flush of the opcode: -@@ -1677,6 +1678,24 @@ local map_op = { - -- Intel ADX - adcx_2 = "rmqd:660F38F6rM", - adox_2 = "rmqd:F30F38F6rM", -+ -+ -- BMI1 -+ andn_3 = "rrmqd:0F38VF2rM", -+ bextr_3 = "rmrqd:0F38wF7rM", -+ blsi_2 = "rmqd:0F38vF33m", -+ blsmsk_2 = "rmqd:0F38vF32m", -+ blsr_2 = "rmqd:0F38vF31m", -+ tzcnt_2 = "rmqdw:F30FBCrM", -+ -+ -- BMI2 -+ bzhi_3 = "rmrqd:0F38wF5rM", -+ mulx_3 = "rrmqd:F20F38VF6rM", -+ pdep_3 = "rrmqd:F20F38VF5rM", -+ pext_3 = "rrmqd:F30F38VF5rM", -+ rorx_3 = "rmSqd:F20F3AuF0rMS", -+ sarx_3 = "rmrqd:F30F38wF7rM", -+ shrx_3 = "rmrqd:F20F38wF7rM", -+ shlx_3 = "rmrqd:660F38wF7rM", - } - - ------------------------------------------------------------------------------ --- -2.20.1 - diff --git a/0036-Give-expected-results-for-negative-non-base-10-numbe.patch b/0036-Give-expected-results-for-negative-non-base-10-numbe.patch deleted file mode 100644 index 3279dfe..0000000 --- a/0036-Give-expected-results-for-negative-non-base-10-numbe.patch +++ /dev/null @@ -1,55 +0,0 @@ -From f3cf0d6e15240098147437fed7bd436ff55fdf8c Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Sun, 22 Apr 2018 13:14:28 +0200 -Subject: [PATCH 36/72] Give expected results for negative non-base-10 numbers - in tonumber(). - -This was undefined in Lua 5.1, but it's defined in 5.2. ---- - src/lib_base.c | 27 ++++++++++++++++++--------- - 1 file changed, 18 insertions(+), 9 deletions(-) - -diff --git a/src/lib_base.c b/src/lib_base.c -index 3a75787..d61e876 100644 ---- a/src/lib_base.c -+++ b/src/lib_base.c -@@ -287,18 +287,27 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) - } else { - const char *p = strdata(lj_lib_checkstr(L, 1)); - char *ep; -+ unsigned int neg = 0; - unsigned long ul; - if (base < 2 || base > 36) - lj_err_arg(L, 2, LJ_ERR_BASERNG); -- ul = strtoul(p, &ep, base); -- if (p != ep) { -- while (lj_char_isspace((unsigned char)(*ep))) ep++; -- if (*ep == '\0') { -- if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u)) -- setintV(L->base-1-LJ_FR2, (int32_t)ul); -- else -- setnumV(L->base-1-LJ_FR2, (lua_Number)ul); -- return FFH_RES(1); -+ while (lj_char_isspace((unsigned char)(*p))) p++; -+ if (*p == '-') { p++; neg = 1; } else if (*p == '+') { p++; } -+ if (lj_char_isalnum((unsigned char)(*p))) { -+ ul = strtoul(p, &ep, base); -+ if (p != ep) { -+ while (lj_char_isspace((unsigned char)(*ep))) ep++; -+ if (*ep == '\0') { -+ if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) { -+ if (neg) ul = -ul; -+ setintV(L->base-1-LJ_FR2, (int32_t)ul); -+ } else { -+ lua_Number n = (lua_Number)ul; -+ if (neg) n = -n; -+ setnumV(L->base-1-LJ_FR2, n); -+ } -+ return FFH_RES(1); -+ } - } - } - } --- -2.20.1 - diff --git a/0037-FFI-Add-tonumber-specialization-for-failed-conversio.patch b/0037-FFI-Add-tonumber-specialization-for-failed-conversio.patch deleted file mode 100644 index c0406a5..0000000 --- a/0037-FFI-Add-tonumber-specialization-for-failed-conversio.patch +++ /dev/null @@ -1,27 +0,0 @@ -From 02b521981a1ab919ff2cd4d9bcaee80baf77dce2 Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Sun, 22 Apr 2018 13:27:25 +0200 -Subject: [PATCH 37/72] FFI: Add tonumber() specialization for failed - conversions. - -Contributed by Javier Guerra Giraldez. ---- - src/lj_crecord.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/src/lj_crecord.c b/src/lj_crecord.c -index 84fc49e..bc88d63 100644 ---- a/src/lj_crecord.c -+++ b/src/lj_crecord.c -@@ -1661,6 +1661,8 @@ void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd) - d = ctype_get(cts, CTID_DOUBLE); - J->base[0] = crec_ct_tv(J, d, 0, J->base[0], &rd->argv[0]); - } else { -+ /* Specialize to the ctype that couldn't be converted. */ -+ argv2cdata(J, J->base[0], &rd->argv[0]); - J->base[0] = TREF_NIL; - } - } --- -2.20.1 - diff --git a/0038-Bump-copyright-date-to-2018.patch b/0038-Bump-copyright-date-to-2018.patch deleted file mode 100644 index 1f9e5eb..0000000 --- a/0038-Bump-copyright-date-to-2018.patch +++ /dev/null @@ -1,387 +0,0 @@ -From cf7a0540a3a9f80fc729211eb21d1e9b72acc89c Mon Sep 17 00:00:00 2001 -From: Mike Pall -Date: Wed, 25 Apr 2018 12:07:08 +0200 -Subject: [PATCH 38/72] Bump copyright date to 2018. - ---- - doc/bluequad-print.css | 2 +- - doc/bluequad.css | 2 +- - doc/changes.html | 5 ++--- - doc/contact.html | 7 +++---- - doc/ext_c_api.html | 5 ++--- - doc/ext_ffi.html | 5 ++--- - doc/ext_ffi_api.html | 5 ++--- - doc/ext_ffi_semantics.html | 5 ++--- - doc/ext_ffi_tutorial.html | 5 ++--- - doc/ext_jit.html | 5 ++--- - doc/extensions.html | 5 ++--- - doc/faq.html | 5 ++--- - doc/install.html | 5 ++--- - doc/luajit.html | 7 +++---- - doc/running.html | 5 ++--- - doc/status.html | 5 ++--- - 16 files changed, 32 insertions(+), 46 deletions(-) - -diff --git a/doc/bluequad-print.css b/doc/bluequad-print.css -index 62e1c16..d5a3ea3 100644 ---- a/doc/bluequad-print.css -+++ b/doc/bluequad-print.css -@@ -1,4 +1,4 @@ --/* Copyright (C) 2004-2017 Mike Pall. -+/* Copyright (C) 2004-2018 Mike Pall. - * - * You are welcome to use the general ideas of this design for your own sites. - * But please do not steal the stylesheet, the layout or the color scheme. -diff --git a/doc/bluequad.css b/doc/bluequad.css -index be2c4bf..cfc889a 100644 ---- a/doc/bluequad.css -+++ b/doc/bluequad.css -@@ -1,4 +1,4 @@ --/* Copyright (C) 2004-2017 Mike Pall. -+/* Copyright (C) 2004-2018 Mike Pall. - * - * You are welcome to use the general ideas of this design for your own sites. - * But please do not steal the stylesheet, the layout or the color scheme. -diff --git a/doc/changes.html b/doc/changes.html -index 4a4d4fb..c1848e8 100644 ---- a/doc/changes.html -+++ b/doc/changes.html -@@ -3,8 +3,7 @@ - - LuaJIT Change History - -- -- -+ - - - -@@ -1010,7 +1009,7 @@ This is the initial non-public release of LuaJIT. - -

- - - - - - - - - - - - -