// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "IrLoweringA64.h"

#include "Luau/CodeGen.h"
#include "Luau/DenseHash.h"
#include "Luau/IrAnalysis.h"
#include "Luau/IrDump.h"
#include "Luau/IrUtils.h"

#include "EmitCommonA64.h"
#include "NativeState.h"

#include "lstate.h"
#include "lgc.h"

namespace Luau
{
namespace CodeGen
{
namespace A64
{

inline ConditionA64 getConditionFP(IrCondition cond)
{
    switch (cond)
    {
    case IrCondition::Equal:
        return ConditionA64::Equal;

    case IrCondition::NotEqual:
        return ConditionA64::NotEqual;

    case IrCondition::Less:
        return ConditionA64::Minus;

    case IrCondition::NotLess:
        return ConditionA64::Plus;

    case IrCondition::LessEqual:
        return ConditionA64::UnsignedLessEqual;

    case IrCondition::NotLessEqual:
        return ConditionA64::UnsignedGreater;

    case IrCondition::Greater:
        return ConditionA64::Greater;

    case IrCondition::NotGreater:
        return ConditionA64::LessEqual;

    case IrCondition::GreaterEqual:
        return ConditionA64::GreaterEqual;

    case IrCondition::NotGreaterEqual:
        return ConditionA64::Less;

    default:
        LUAU_ASSERT(!"Unexpected condition code");
        return ConditionA64::Always;
    }
}

static void checkObjectBarrierConditions(AssemblyBuilderA64& build, RegisterA64 object, RegisterA64 temp, int ra, Label& skip)
{
    RegisterA64 tempw = castReg(KindA64::w, temp);

    // iscollectable(ra)
    build.ldr(tempw, mem(rBase, ra * sizeof(TValue) + offsetof(TValue, tt)));
    build.cmp(tempw, LUA_TSTRING);
    build.b(ConditionA64::Less, skip);

    // isblack(obj2gco(o))
    build.ldrb(tempw, mem(object, offsetof(GCheader, marked)));
    build.tbz(tempw, BLACKBIT, skip);

    // iswhite(gcvalue(ra))
    build.ldr(temp, mem(rBase, ra * sizeof(TValue) + offsetof(TValue, value)));
    build.ldrb(tempw, mem(temp, offsetof(GCheader, marked)));
    build.tst(tempw, bit2mask(WHITE0BIT, WHITE1BIT));
    build.b(ConditionA64::Equal, skip); // Equal = Zero after tst
}

static void emitAddOffset(AssemblyBuilderA64& build, RegisterA64 dst, RegisterA64 src, size_t offset)
{
    LUAU_ASSERT(dst != src);
    LUAU_ASSERT(offset <= INT_MAX);

    if (offset <= AssemblyBuilderA64::kMaxImmediate)
    {
        build.add(dst, src, uint16_t(offset));
    }
    else
    {
        build.mov(dst, int(offset));
        build.add(dst, dst, src);
    }
}

static void emitAbort(AssemblyBuilderA64& build, Label& abort)
{
    Label skip;
    build.b(skip);
    build.setLabel(abort);
    build.udf();
    build.setLabel(skip);
}

static void emitFallback(AssemblyBuilderA64& build, int offset, int pcpos)
{
    // fallback(L, instruction, base, k)
    build.mov(x0, rState);
    emitAddOffset(build, x1, rCode, pcpos * sizeof(Instruction));
    build.mov(x2, rBase);
    build.mov(x3, rConstants);
    build.ldr(x4, mem(rNativeContext, offset));
    build.blr(x4);

    emitUpdateBase(build);
}

static void emitInvokeLibm1P(AssemblyBuilderA64& build, size_t func, int arg)
{
    build.ldr(d0, mem(rBase, arg * sizeof(TValue) + offsetof(TValue, value.n)));
    build.add(x0, sp, sTemporary.data); // sp-relative offset
    build.ldr(x1, mem(rNativeContext, uint32_t(func)));
    build.blr(x1);
}

static bool emitBuiltin(
    AssemblyBuilderA64& build, IrFunction& function, IrRegAllocA64& regs, int bfid, int res, int arg, IrOp args, int nparams, int nresults)
{
    switch (bfid)
    {
    case LBF_MATH_FREXP:
        LUAU_ASSERT(nparams == 1 && (nresults == 1 || nresults == 2));
        emitInvokeLibm1P(build, offsetof(NativeContext, libm_frexp), arg);
        build.str(d0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n)));
        if (nresults == 2)
        {
            build.ldr(w0, sTemporary);
            build.scvtf(d1, w0);
            build.str(d1, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, value.n)));
        }
        return true;
    case LBF_MATH_MODF:
        LUAU_ASSERT(nparams == 1 && (nresults == 1 || nresults == 2));
        emitInvokeLibm1P(build, offsetof(NativeContext, libm_modf), arg);
        build.ldr(d1, sTemporary);
        build.str(d1, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n)));
        if (nresults == 2)
            build.str(d0, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, value.n)));
        return true;
    case LBF_MATH_SIGN:
        LUAU_ASSERT(nparams == 1 && nresults == 1);
        build.ldr(d0, mem(rBase, arg * sizeof(TValue) + offsetof(TValue, value.n)));
        build.fcmpz(d0);
        build.fmov(d0, 0.0);
        build.fmov(d1, 1.0);
        build.fcsel(d0, d1, d0, getConditionFP(IrCondition::Greater));
        build.fmov(d1, -1.0);
        build.fcsel(d0, d1, d0, getConditionFP(IrCondition::Less));
        build.str(d0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n)));
        return true;

    case LBF_TYPE:
        build.ldr(w0, mem(rBase, arg * sizeof(TValue) + offsetof(TValue, tt)));
        build.ldr(x1, mem(rState, offsetof(lua_State, global)));
        LUAU_ASSERT(sizeof(TString*) == 8);
        build.add(x1, x1, zextReg(w0), 3);
        build.ldr(x0, mem(x1, offsetof(global_State, ttname)));
        build.str(x0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.gc)));
        return true;

    case LBF_TYPEOF:
        build.mov(x0, rState);
        build.add(x1, rBase, uint16_t(arg * sizeof(TValue)));
        build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaT_objtypenamestr)));
        build.blr(x2);
        build.str(x0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.gc)));
        return true;

    default:
        LUAU_ASSERT(!"Missing A64 lowering");
        return false;
    }
}

IrLoweringA64::IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, NativeState& data, IrFunction& function)
    : build(build)
    , helpers(helpers)
    , data(data)
    , function(function)
    , regs(function, {{x0, x15}, {x16, x17}, {q0, q7}, {q16, q31}})
    , valueTracker(function)
{
    // In order to allocate registers during lowering, we need to know where instruction results are last used
    updateLastUseLocations(function);

    valueTracker.setRestoreCallack(this, [](void* context, IrInst& inst) {
        IrLoweringA64* self = static_cast<IrLoweringA64*>(context);
        self->regs.restoreReg(self->build, inst);
    });
}

void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
{
    valueTracker.beforeInstLowering(inst);

    switch (inst.cmd)
    {
    case IrCmd::LOAD_TAG:
    {
        inst.regA64 = regs.allocReg(KindA64::w, index);
        AddressA64 addr = tempAddr(inst.a, offsetof(TValue, tt));
        build.ldr(inst.regA64, addr);
        break;
    }
    case IrCmd::LOAD_POINTER:
    {
        inst.regA64 = regs.allocReg(KindA64::x, index);
        AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value.gc));
        build.ldr(inst.regA64, addr);
        break;
    }
    case IrCmd::LOAD_DOUBLE:
    {
        inst.regA64 = regs.allocReg(KindA64::d, index);
        AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value.n));
        build.ldr(inst.regA64, addr);
        break;
    }
    case IrCmd::LOAD_INT:
    {
        inst.regA64 = regs.allocReg(KindA64::w, index);
        AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
        build.ldr(inst.regA64, addr);
        break;
    }
    case IrCmd::LOAD_TVALUE:
    {
        inst.regA64 = regs.allocReg(KindA64::q, index);
        AddressA64 addr = tempAddr(inst.a, 0);
        build.ldr(inst.regA64, addr);
        break;
    }
    case IrCmd::LOAD_NODE_VALUE_TV:
    {
        inst.regA64 = regs.allocReg(KindA64::q, index);
        build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(LuaNode, val)));
        break;
    }
    case IrCmd::LOAD_ENV:
        inst.regA64 = regs.allocReg(KindA64::x, index);
        build.ldr(inst.regA64, mem(rClosure, offsetof(Closure, env)));
        break;
    case IrCmd::GET_ARR_ADDR:
    {
        inst.regA64 = regs.allocReuse(KindA64::x, index, {inst.a});
        build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(Table, array)));

        if (inst.b.kind == IrOpKind::Inst)
        {
            build.add(inst.regA64, inst.regA64, zextReg(regOp(inst.b)), kTValueSizeLog2);
        }
        else if (inst.b.kind == IrOpKind::Constant)
        {
            if (intOp(inst.b) == 0)
            {
                // no offset required
            }
            else if (intOp(inst.b) * sizeof(TValue) <= AssemblyBuilderA64::kMaxImmediate)
            {
                build.add(inst.regA64, inst.regA64, uint16_t(intOp(inst.b) * sizeof(TValue)));
            }
            else
            {
                RegisterA64 temp = regs.allocTemp(KindA64::x);
                build.mov(temp, intOp(inst.b) * sizeof(TValue));
                build.add(inst.regA64, inst.regA64, temp);
            }
        }
        else
            LUAU_ASSERT(!"Unsupported instruction form");
        break;
    }
    case IrCmd::GET_SLOT_NODE_ADDR:
    {
        inst.regA64 = regs.allocReuse(KindA64::x, index, {inst.a});
        RegisterA64 temp1 = regs.allocTemp(KindA64::x);
        RegisterA64 temp1w = castReg(KindA64::w, temp1);
        RegisterA64 temp2 = regs.allocTemp(KindA64::w);

        // note: since the stride of the load is the same as the destination register size, we can range check the array index, not the byte offset
        if (uintOp(inst.b) <= AddressA64::kMaxOffset)
            build.ldr(temp1w, mem(rCode, uintOp(inst.b) * sizeof(Instruction)));
        else
        {
            build.mov(temp1, uintOp(inst.b) * sizeof(Instruction));
            build.ldr(temp1w, mem(rCode, temp1));
        }

        // C field can be shifted as long as it's at the most significant byte of the instruction word
        LUAU_ASSERT(kOffsetOfInstructionC == 3);
        build.ldrb(temp2, mem(regOp(inst.a), offsetof(Table, nodemask8)));
        build.and_(temp2, temp2, temp1w, -24);

        // note: this may clobber inst.a, so it's important that we don't use it after this
        build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(Table, node)));
        build.add(inst.regA64, inst.regA64, zextReg(temp2), kLuaNodeSizeLog2);
        break;
    }
    case IrCmd::GET_HASH_NODE_ADDR:
    {
        inst.regA64 = regs.allocReuse(KindA64::x, index, {inst.a});
        RegisterA64 temp1 = regs.allocTemp(KindA64::w);
        RegisterA64 temp2 = regs.allocTemp(KindA64::w);

        // hash & ((1 << lsizenode) - 1) == hash & ~(-1 << lsizenode)
        build.mov(temp1, -1);
        build.ldrb(temp2, mem(regOp(inst.a), offsetof(Table, lsizenode)));
        build.lsl(temp1, temp1, temp2);
        build.mov(temp2, uintOp(inst.b));
        build.bic(temp2, temp2, temp1);

        // note: this may clobber inst.a, so it's important that we don't use it after this
        build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(Table, node)));
        build.add(inst.regA64, inst.regA64, zextReg(temp2), kLuaNodeSizeLog2);
        break;
    }
    case IrCmd::STORE_TAG:
    {
        RegisterA64 temp = regs.allocTemp(KindA64::w);
        AddressA64 addr = tempAddr(inst.a, offsetof(TValue, tt));
        build.mov(temp, tagOp(inst.b));
        build.str(temp, addr);
        break;
    }
    case IrCmd::STORE_POINTER:
    {
        AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
        build.str(regOp(inst.b), addr);
        break;
    }
    case IrCmd::STORE_DOUBLE:
    {
        RegisterA64 temp = tempDouble(inst.b);
        AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
        build.str(temp, addr);
        break;
    }
    case IrCmd::STORE_INT:
    {
        RegisterA64 temp = tempInt(inst.b);
        AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
        build.str(temp, addr);
        break;
    }
    case IrCmd::STORE_VECTOR:
    {
        RegisterA64 temp1 = tempDouble(inst.b);
        RegisterA64 temp2 = tempDouble(inst.c);
        RegisterA64 temp3 = tempDouble(inst.d);
        RegisterA64 temp4 = regs.allocTemp(KindA64::s);

        AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
        LUAU_ASSERT(addr.kind == AddressKindA64::imm && addr.data % 4 == 0 && unsigned(addr.data + 8) / 4 <= AddressA64::kMaxOffset);

        build.fcvt(temp4, temp1);
        build.str(temp4, AddressA64(addr.base, addr.data + 0));
        build.fcvt(temp4, temp2);
        build.str(temp4, AddressA64(addr.base, addr.data + 4));
        build.fcvt(temp4, temp3);
        build.str(temp4, AddressA64(addr.base, addr.data + 8));
        break;
    }
    case IrCmd::STORE_TVALUE:
    {
        AddressA64 addr = tempAddr(inst.a, 0);
        build.str(regOp(inst.b), addr);
        break;
    }
    case IrCmd::STORE_NODE_VALUE_TV:
        build.str(regOp(inst.b), mem(regOp(inst.a), offsetof(LuaNode, val)));
        break;
    case IrCmd::ADD_INT:
        inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
        if (inst.b.kind == IrOpKind::Constant && unsigned(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate)
            build.add(inst.regA64, regOp(inst.a), uint16_t(intOp(inst.b)));
        else if (inst.a.kind == IrOpKind::Constant && unsigned(intOp(inst.a)) <= AssemblyBuilderA64::kMaxImmediate)
            build.add(inst.regA64, regOp(inst.b), uint16_t(intOp(inst.a)));
        else
        {
            RegisterA64 temp = tempInt(inst.b);
            build.add(inst.regA64, regOp(inst.a), temp);
        }
        break;
    case IrCmd::SUB_INT:
        inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
        if (inst.b.kind == IrOpKind::Constant && unsigned(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate)
            build.sub(inst.regA64, regOp(inst.a), uint16_t(intOp(inst.b)));
        else
        {
            RegisterA64 temp = tempInt(inst.b);
            build.sub(inst.regA64, regOp(inst.a), temp);
        }
        break;
    case IrCmd::ADD_NUM:
    {
        inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
        RegisterA64 temp1 = tempDouble(inst.a);
        RegisterA64 temp2 = tempDouble(inst.b);
        build.fadd(inst.regA64, temp1, temp2);
        break;
    }
    case IrCmd::SUB_NUM:
    {
        inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
        RegisterA64 temp1 = tempDouble(inst.a);
        RegisterA64 temp2 = tempDouble(inst.b);
        build.fsub(inst.regA64, temp1, temp2);
        break;
    }
    case IrCmd::MUL_NUM:
    {
        inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
        RegisterA64 temp1 = tempDouble(inst.a);
        RegisterA64 temp2 = tempDouble(inst.b);
        build.fmul(inst.regA64, temp1, temp2);
        break;
    }
    case IrCmd::DIV_NUM:
    {
        inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
        RegisterA64 temp1 = tempDouble(inst.a);
        RegisterA64 temp2 = tempDouble(inst.b);
        build.fdiv(inst.regA64, temp1, temp2);
        break;
    }
    case IrCmd::MOD_NUM:
    {
        inst.regA64 = regs.allocReg(KindA64::d, index); // can't allocReuse because both A and B are used twice
        RegisterA64 temp1 = tempDouble(inst.a);
        RegisterA64 temp2 = tempDouble(inst.b);
        build.fdiv(inst.regA64, temp1, temp2);
        build.frintm(inst.regA64, inst.regA64);
        build.fmul(inst.regA64, inst.regA64, temp2);
        build.fsub(inst.regA64, temp1, inst.regA64);
        break;
    }
    case IrCmd::MIN_NUM:
    {
        inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
        RegisterA64 temp1 = tempDouble(inst.a);
        RegisterA64 temp2 = tempDouble(inst.b);
        build.fcmp(temp1, temp2);
        build.fcsel(inst.regA64, temp1, temp2, getConditionFP(IrCondition::Less));
        break;
    }
    case IrCmd::MAX_NUM:
    {
        inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
        RegisterA64 temp1 = tempDouble(inst.a);
        RegisterA64 temp2 = tempDouble(inst.b);
        build.fcmp(temp1, temp2);
        build.fcsel(inst.regA64, temp1, temp2, getConditionFP(IrCondition::Greater));
        break;
    }
    case IrCmd::UNM_NUM:
    {
        inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
        RegisterA64 temp = tempDouble(inst.a);
        build.fneg(inst.regA64, temp);
        break;
    }
    case IrCmd::FLOOR_NUM:
    {
        inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
        RegisterA64 temp = tempDouble(inst.a);
        build.frintm(inst.regA64, temp);
        break;
    }
    case IrCmd::CEIL_NUM:
    {
        inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
        RegisterA64 temp = tempDouble(inst.a);
        build.frintp(inst.regA64, temp);
        break;
    }
    case IrCmd::ROUND_NUM:
    {
        inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
        RegisterA64 temp = tempDouble(inst.a);
        build.frinta(inst.regA64, temp);
        break;
    }
    case IrCmd::SQRT_NUM:
    {
        inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
        RegisterA64 temp = tempDouble(inst.a);
        build.fsqrt(inst.regA64, temp);
        break;
    }
    case IrCmd::ABS_NUM:
    {
        inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
        RegisterA64 temp = tempDouble(inst.a);
        build.fabs(inst.regA64, temp);
        break;
    }
    case IrCmd::NOT_ANY:
    {
        inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});

        if (inst.a.kind == IrOpKind::Constant)
        {
            // other cases should've been constant folded
            LUAU_ASSERT(tagOp(inst.a) == LUA_TBOOLEAN);
            build.eor(inst.regA64, regOp(inst.b), 1);
        }
        else
        {
            Label notbool, exit;

            // use the fact that NIL is the only value less than BOOLEAN to do two tag comparisons at once
            LUAU_ASSERT(LUA_TNIL == 0 && LUA_TBOOLEAN == 1);
            build.cmp(regOp(inst.a), LUA_TBOOLEAN);
            build.b(ConditionA64::NotEqual, notbool);

            // boolean => invert value
            build.eor(inst.regA64, regOp(inst.b), 1);
            build.b(exit);

            // not boolean => result is true iff tag was nil
            build.setLabel(notbool);
            build.cset(inst.regA64, ConditionA64::Less);

            build.setLabel(exit);
        }
        break;
    }
    case IrCmd::JUMP:
        jumpOrFallthrough(blockOp(inst.a), next);
        break;
    case IrCmd::JUMP_IF_TRUTHY:
    {
        RegisterA64 temp = regs.allocTemp(KindA64::w);
        build.ldr(temp, mem(rBase, vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, tt)));
        // nil => falsy
        LUAU_ASSERT(LUA_TNIL == 0);
        build.cbz(temp, labelOp(inst.c));
        // not boolean => truthy
        build.cmp(temp, LUA_TBOOLEAN);
        build.b(ConditionA64::NotEqual, labelOp(inst.b));
        // compare boolean value
        build.ldr(temp, mem(rBase, vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, value)));
        build.cbnz(temp, labelOp(inst.b));
        jumpOrFallthrough(blockOp(inst.c), next);
        break;
    }
    case IrCmd::JUMP_IF_FALSY:
    {
        RegisterA64 temp = regs.allocTemp(KindA64::w);
        build.ldr(temp, mem(rBase, vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, tt)));
        // nil => falsy
        LUAU_ASSERT(LUA_TNIL == 0);
        build.cbz(temp, labelOp(inst.b));
        // not boolean => truthy
        build.cmp(temp, LUA_TBOOLEAN);
        build.b(ConditionA64::NotEqual, labelOp(inst.c));
        // compare boolean value
        build.ldr(temp, mem(rBase, vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, value)));
        build.cbz(temp, labelOp(inst.b));
        jumpOrFallthrough(blockOp(inst.c), next);
        break;
    }
    case IrCmd::JUMP_EQ_TAG:
    {
        RegisterA64 zr = noreg;

        if (inst.a.kind == IrOpKind::Constant && tagOp(inst.a) == 0)
            zr = regOp(inst.b);
        else if (inst.b.kind == IrOpKind::Constant && tagOp(inst.b) == 0)
            zr = regOp(inst.a);
        else if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant)
            build.cmp(regOp(inst.a), tagOp(inst.b));
        else if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Inst)
            build.cmp(regOp(inst.a), regOp(inst.b));
        else if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Inst)
            build.cmp(regOp(inst.b), tagOp(inst.a));
        else
            LUAU_ASSERT(!"Unsupported instruction form");

        if (isFallthroughBlock(blockOp(inst.d), next))
        {
            if (zr != noreg)
                build.cbz(zr, labelOp(inst.c));
            else
                build.b(ConditionA64::Equal, labelOp(inst.c));
            jumpOrFallthrough(blockOp(inst.d), next);
        }
        else
        {
            if (zr != noreg)
                build.cbnz(zr, labelOp(inst.d));
            else
                build.b(ConditionA64::NotEqual, labelOp(inst.d));
            jumpOrFallthrough(blockOp(inst.c), next);
        }
        break;
    }
    case IrCmd::JUMP_EQ_INT:
        if (intOp(inst.b) == 0)
        {
            build.cbz(regOp(inst.a), labelOp(inst.c));
        }
        else
        {
            LUAU_ASSERT(unsigned(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate);
            build.cmp(regOp(inst.a), uint16_t(intOp(inst.b)));
            build.b(ConditionA64::Equal, labelOp(inst.c));
        }
        jumpOrFallthrough(blockOp(inst.d), next);
        break;
    case IrCmd::JUMP_LT_INT:
        LUAU_ASSERT(unsigned(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate);
        build.cmp(regOp(inst.a), uint16_t(intOp(inst.b)));
        build.b(ConditionA64::Less, labelOp(inst.c));
        jumpOrFallthrough(blockOp(inst.d), next);
        break;
    case IrCmd::JUMP_GE_UINT:
    {
        LUAU_ASSERT(unsigned(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate);
        build.cmp(regOp(inst.a), uint16_t(unsigned(intOp(inst.b))));
        build.b(ConditionA64::CarrySet, labelOp(inst.c));
        jumpOrFallthrough(blockOp(inst.d), next);
        break;
    }
    case IrCmd::JUMP_EQ_POINTER:
        build.cmp(regOp(inst.a), regOp(inst.b));
        build.b(ConditionA64::Equal, labelOp(inst.c));
        jumpOrFallthrough(blockOp(inst.d), next);
        break;
    case IrCmd::JUMP_CMP_NUM:
    {
        IrCondition cond = conditionOp(inst.c);

        if (inst.b.kind == IrOpKind::Constant && doubleOp(inst.b) == 0.0)
        {
            RegisterA64 temp = tempDouble(inst.a);

            build.fcmpz(temp);
        }
        else
        {
            RegisterA64 temp1 = tempDouble(inst.a);
            RegisterA64 temp2 = tempDouble(inst.b);

            build.fcmp(temp1, temp2);
        }

        build.b(getConditionFP(cond), labelOp(inst.d));
        jumpOrFallthrough(blockOp(inst.e), next);
        break;
    }
    case IrCmd::JUMP_CMP_ANY:
    {
        IrCondition cond = conditionOp(inst.c);

        regs.spill(build, index);
        build.mov(x0, rState);
        build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
        build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));

        if (cond == IrCondition::NotLessEqual || cond == IrCondition::LessEqual)
            build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_lessequal)));
        else if (cond == IrCondition::NotLess || cond == IrCondition::Less)
            build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_lessthan)));
        else if (cond == IrCondition::NotEqual || cond == IrCondition::Equal)
            build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_equalval)));
        else
            LUAU_ASSERT(!"Unsupported condition");

        build.blr(x3);

        emitUpdateBase(build);

        if (cond == IrCondition::NotLessEqual || cond == IrCondition::NotLess || cond == IrCondition::NotEqual)
            build.cbz(x0, labelOp(inst.d));
        else
            build.cbnz(x0, labelOp(inst.d));
        jumpOrFallthrough(blockOp(inst.e), next);
        break;
    }
    // IrCmd::JUMP_SLOT_MATCH implemented below
    case IrCmd::TABLE_LEN:
    {
        RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads
        regs.spill(build, index, {reg});
        build.mov(x0, reg);
        build.ldr(x1, mem(rNativeContext, offsetof(NativeContext, luaH_getn)));
        build.blr(x1);
        inst.regA64 = regs.allocReg(KindA64::d, index);
        build.scvtf(inst.regA64, x0);
        break;
    }
    case IrCmd::NEW_TABLE:
    {
        regs.spill(build, index);
        build.mov(x0, rState);
        build.mov(x1, uintOp(inst.a));
        build.mov(x2, uintOp(inst.b));
        build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaH_new)));
        build.blr(x3);
        inst.regA64 = regs.takeReg(x0, index);
        break;
    }
    case IrCmd::DUP_TABLE:
    {
        RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads
        regs.spill(build, index, {reg});
        build.mov(x1, reg);
        build.mov(x0, rState);
        build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaH_clone)));
        build.blr(x2);
        inst.regA64 = regs.takeReg(x0, index);
        break;
    }
    case IrCmd::TRY_NUM_TO_INDEX:
    {
        inst.regA64 = regs.allocReg(KindA64::w, index);
        RegisterA64 temp1 = tempDouble(inst.a);

        if (build.features & Feature_JSCVT)
        {
            build.fjcvtzs(inst.regA64, temp1); // fjcvtzs sets PSTATE.Z (equal) iff conversion is exact
            build.b(ConditionA64::NotEqual, labelOp(inst.b));
        }
        else
        {
            RegisterA64 temp2 = regs.allocTemp(KindA64::d);

            build.fcvtzs(inst.regA64, temp1);
            build.scvtf(temp2, inst.regA64);
            build.fcmp(temp1, temp2);
            build.b(ConditionA64::NotEqual, labelOp(inst.b));
        }
        break;
    }
    case IrCmd::TRY_CALL_FASTGETTM:
    {
        RegisterA64 temp1 = regs.allocTemp(KindA64::x);
        RegisterA64 temp2 = regs.allocTemp(KindA64::w);

        build.ldr(temp1, mem(regOp(inst.a), offsetof(Table, metatable)));
        build.cbz(temp1, labelOp(inst.c)); // no metatable

        build.ldrb(temp2, mem(temp1, offsetof(Table, tmcache)));
        build.tst(temp2, 1 << intOp(inst.b));             // can't use tbz/tbnz because their jump offsets are too short
        build.b(ConditionA64::NotEqual, labelOp(inst.c)); // Equal = Zero after tst; tmcache caches *absence* of metamethods

        regs.spill(build, index, {temp1});
        build.mov(x0, temp1);
        build.mov(w1, intOp(inst.b));
        build.ldr(x2, mem(rState, offsetof(lua_State, global)));
        build.ldr(x2, mem(x2, offsetof(global_State, tmname) + intOp(inst.b) * sizeof(TString*)));
        build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaT_gettm)));
        build.blr(x3);

        build.cbz(x0, labelOp(inst.c)); // no tag method

        inst.regA64 = regs.takeReg(x0, index);
        break;
    }
    case IrCmd::INT_TO_NUM:
    {
        inst.regA64 = regs.allocReg(KindA64::d, index);
        RegisterA64 temp = tempInt(inst.a);
        build.scvtf(inst.regA64, temp);
        break;
    }
    case IrCmd::UINT_TO_NUM:
    {
        inst.regA64 = regs.allocReg(KindA64::d, index);
        RegisterA64 temp = tempInt(inst.a);
        build.ucvtf(inst.regA64, temp);
        break;
    }
    case IrCmd::NUM_TO_INT:
    {
        inst.regA64 = regs.allocReg(KindA64::w, index);
        RegisterA64 temp = tempDouble(inst.a);
        build.fcvtzs(inst.regA64, temp);
        break;
    }
    case IrCmd::NUM_TO_UINT:
    {
        inst.regA64 = regs.allocReg(KindA64::w, index);
        RegisterA64 temp = tempDouble(inst.a);
        build.fcvtzs(castReg(KindA64::x, inst.regA64), temp);
        // truncation needs to clear high bits to preserve zextReg safety contract
        build.mov(inst.regA64, inst.regA64);
        break;
    }
    case IrCmd::ADJUST_STACK_TO_REG:
    {
        RegisterA64 temp = regs.allocTemp(KindA64::x);

        if (inst.b.kind == IrOpKind::Constant)
        {
            build.add(temp, rBase, uint16_t((vmRegOp(inst.a) + intOp(inst.b)) * sizeof(TValue)));
            build.str(temp, mem(rState, offsetof(lua_State, top)));
        }
        else if (inst.b.kind == IrOpKind::Inst)
        {
            build.add(temp, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
            build.add(temp, temp, zextReg(regOp(inst.b)), kTValueSizeLog2);
            build.str(temp, mem(rState, offsetof(lua_State, top)));
        }
        else
            LUAU_ASSERT(!"Unsupported instruction form");
        break;
    }
    case IrCmd::ADJUST_STACK_TO_TOP:
    {
        RegisterA64 temp = regs.allocTemp(KindA64::x);
        build.ldr(temp, mem(rState, offsetof(lua_State, ci)));
        build.ldr(temp, mem(temp, offsetof(CallInfo, top)));
        build.str(temp, mem(rState, offsetof(lua_State, top)));
        break;
    }
    case IrCmd::FASTCALL:
        regs.spill(build, index);
        error |= emitBuiltin(build, function, regs, uintOp(inst.a), vmRegOp(inst.b), vmRegOp(inst.c), inst.d, intOp(inst.e), intOp(inst.f));
        break;
    case IrCmd::INVOKE_FASTCALL:
    {
        regs.spill(build, index);
        build.mov(x0, rState);
        build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
        build.add(x2, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue)));
        build.mov(w3, intOp(inst.f)); // nresults

        if (inst.d.kind == IrOpKind::VmReg)
            build.add(x4, rBase, uint16_t(vmRegOp(inst.d) * sizeof(TValue)));
        else if (inst.d.kind == IrOpKind::VmConst)
            emitAddOffset(build, x4, rConstants, vmConstOp(inst.d) * sizeof(TValue));
        else
            LUAU_ASSERT(inst.d.kind == IrOpKind::Undef);

        // nparams
        if (intOp(inst.e) == LUA_MULTRET)
        {
            // L->top - (ra + 1)
            build.ldr(x5, mem(rState, offsetof(lua_State, top)));
            build.sub(x5, x5, rBase);
            build.sub(x5, x5, uint16_t((vmRegOp(inst.b) + 1) * sizeof(TValue)));
            build.lsr(x5, x5, kTValueSizeLog2);
        }
        else
            build.mov(w5, intOp(inst.e));

        build.ldr(x6, mem(rNativeContext, offsetof(NativeContext, luauF_table) + uintOp(inst.a) * sizeof(luau_FastFunction)));
        build.blr(x6);

        // since w0 came from a call, we need to move it so that we don't violate zextReg safety contract
        inst.regA64 = regs.allocReg(KindA64::w, index);
        build.mov(inst.regA64, w0);
        break;
    }
    case IrCmd::CHECK_FASTCALL_RES:
        build.cmp(regOp(inst.a), 0);
        build.b(ConditionA64::Less, labelOp(inst.b));
        break;
    case IrCmd::DO_ARITH:
        regs.spill(build, index);
        build.mov(x0, rState);
        build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
        build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));

        if (inst.c.kind == IrOpKind::VmConst)
            emitAddOffset(build, x3, rConstants, vmConstOp(inst.c) * sizeof(TValue));
        else
            build.add(x3, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue)));

        build.mov(w4, TMS(intOp(inst.d)));
        build.ldr(x5, mem(rNativeContext, offsetof(NativeContext, luaV_doarith)));
        build.blr(x5);

        emitUpdateBase(build);
        break;
    case IrCmd::DO_LEN:
        regs.spill(build, index);
        build.mov(x0, rState);
        build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
        build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
        build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_dolen)));
        build.blr(x3);

        emitUpdateBase(build);
        break;
    case IrCmd::GET_TABLE:
        regs.spill(build, index);
        build.mov(x0, rState);
        build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));

        if (inst.c.kind == IrOpKind::VmReg)
            build.add(x2, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue)));
        else if (inst.c.kind == IrOpKind::Constant)
        {
            TValue n = {};
            setnvalue(&n, uintOp(inst.c));
            build.adr(x2, &n, sizeof(n));
        }
        else
            LUAU_ASSERT(!"Unsupported instruction form");

        build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
        build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_gettable)));
        build.blr(x4);

        emitUpdateBase(build);
        break;
    case IrCmd::SET_TABLE:
        regs.spill(build, index);
        build.mov(x0, rState);
        build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));

        if (inst.c.kind == IrOpKind::VmReg)
            build.add(x2, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue)));
        else if (inst.c.kind == IrOpKind::Constant)
        {
            TValue n = {};
            setnvalue(&n, uintOp(inst.c));
            build.adr(x2, &n, sizeof(n));
        }
        else
            LUAU_ASSERT(!"Unsupported instruction form");

        build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
        build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_settable)));
        build.blr(x4);

        emitUpdateBase(build);
        break;
    case IrCmd::GET_IMPORT:
        regs.spill(build, index);
        // luaV_getimport(L, cl->env, k, ra, aux, /* propagatenil= */ false)
        build.mov(x0, rState);
        build.ldr(x1, mem(rClosure, offsetof(Closure, env)));
        build.mov(x2, rConstants);
        build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
        build.mov(w4, uintOp(inst.b));
        build.mov(w5, 0);
        build.ldr(x6, mem(rNativeContext, offsetof(NativeContext, luaV_getimport)));
        build.blr(x6);

        emitUpdateBase(build);
        break;
    case IrCmd::CONCAT:
        regs.spill(build, index);
        build.mov(x0, rState);
        build.mov(x1, uintOp(inst.b));
        build.mov(x2, vmRegOp(inst.a) + uintOp(inst.b) - 1);
        build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_concat)));
        build.blr(x3);

        emitUpdateBase(build);
        break;
    case IrCmd::GET_UPVALUE:
    {
        RegisterA64 temp1 = regs.allocTemp(KindA64::x);
        RegisterA64 temp2 = regs.allocTemp(KindA64::q);
        RegisterA64 temp3 = regs.allocTemp(KindA64::w);

        build.add(temp1, rClosure, uint16_t(offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.b)));

        // uprefs[] is either an actual value, or it points to UpVal object which has a pointer to value
        Label skip;
        build.ldr(temp3, mem(temp1, offsetof(TValue, tt)));
        build.cmp(temp3, LUA_TUPVAL);
        build.b(ConditionA64::NotEqual, skip);

        // UpVal.v points to the value (either on stack, or on heap inside each UpVal, but we can deref it unconditionally)
        build.ldr(temp1, mem(temp1, offsetof(TValue, value.gc)));
        build.ldr(temp1, mem(temp1, offsetof(UpVal, v)));

        build.setLabel(skip);

        build.ldr(temp2, temp1);
        build.str(temp2, mem(rBase, vmRegOp(inst.a) * sizeof(TValue)));
        break;
    }
    case IrCmd::SET_UPVALUE:
    {
        RegisterA64 temp1 = regs.allocTemp(KindA64::x);
        RegisterA64 temp2 = regs.allocTemp(KindA64::x);
        RegisterA64 temp3 = regs.allocTemp(KindA64::q);

        // UpVal*
        build.ldr(temp1, mem(rClosure, offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.a) + offsetof(TValue, value.gc)));

        build.ldr(temp2, mem(temp1, offsetof(UpVal, v)));
        build.ldr(temp3, mem(rBase, vmRegOp(inst.b) * sizeof(TValue)));
        build.str(temp3, temp2);

        Label skip;
        checkObjectBarrierConditions(build, temp1, temp2, vmRegOp(inst.b), skip);

        size_t spills = regs.spill(build, index, {temp1});

        build.mov(x1, temp1);
        build.mov(x0, rState);
        build.ldr(x2, mem(rBase, vmRegOp(inst.b) * sizeof(TValue) + offsetof(TValue, value)));
        build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barrierf)));
        build.blr(x3);

        regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state

        // note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack
        build.setLabel(skip);
        break;
    }
    case IrCmd::PREPARE_FORN:
        regs.spill(build, index);
        build.mov(x0, rState);
        build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
        build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
        build.add(x3, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue)));
        build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_prepareFORN)));
        build.blr(x4);
        // note: no emitUpdateBase necessary because prepareFORN does not reallocate stack
        break;
    case IrCmd::CHECK_TAG:
    {
        Label abort; // used when guard aborts execution
        Label& fail = inst.c.kind == IrOpKind::Undef ? abort : labelOp(inst.c);
        if (tagOp(inst.b) == 0)
        {
            build.cbnz(regOp(inst.a), fail);
        }
        else
        {
            build.cmp(regOp(inst.a), tagOp(inst.b));
            build.b(ConditionA64::NotEqual, fail);
        }
        if (abort.id)
            emitAbort(build, abort);
        break;
    }
    case IrCmd::CHECK_READONLY:
    {
        Label abort; // used when guard aborts execution
        RegisterA64 temp = regs.allocTemp(KindA64::w);
        build.ldrb(temp, mem(regOp(inst.a), offsetof(Table, readonly)));
        build.cbnz(temp, inst.b.kind == IrOpKind::Undef ? abort : labelOp(inst.b));
        if (abort.id)
            emitAbort(build, abort);
        break;
    }
    case IrCmd::CHECK_NO_METATABLE:
    {
        Label abort; // used when guard aborts execution
        RegisterA64 temp = regs.allocTemp(KindA64::x);
        build.ldr(temp, mem(regOp(inst.a), offsetof(Table, metatable)));
        build.cbnz(temp, inst.b.kind == IrOpKind::Undef ? abort : labelOp(inst.b));
        if (abort.id)
            emitAbort(build, abort);
        break;
    }
    case IrCmd::CHECK_SAFE_ENV:
    {
        Label abort; // used when guard aborts execution
        RegisterA64 temp = regs.allocTemp(KindA64::x);
        RegisterA64 tempw = castReg(KindA64::w, temp);
        build.ldr(temp, mem(rClosure, offsetof(Closure, env)));
        build.ldrb(tempw, mem(temp, offsetof(Table, safeenv)));
        build.cbz(tempw, inst.a.kind == IrOpKind::Undef ? abort : labelOp(inst.a));
        if (abort.id)
            emitAbort(build, abort);
        break;
    }
    case IrCmd::CHECK_ARRAY_SIZE:
    {
        Label abort; // used when guard aborts execution
        Label& fail = inst.c.kind == IrOpKind::Undef ? abort : labelOp(inst.c);

        RegisterA64 temp = regs.allocTemp(KindA64::w);
        build.ldr(temp, mem(regOp(inst.a), offsetof(Table, sizearray)));

        if (inst.b.kind == IrOpKind::Inst)
        {
            build.cmp(temp, regOp(inst.b));
            build.b(ConditionA64::UnsignedLessEqual, fail);
        }
        else if (inst.b.kind == IrOpKind::Constant)
        {
            if (intOp(inst.b) == 0)
            {
                build.cbz(temp, fail);
            }
            else if (size_t(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate)
            {
                build.cmp(temp, uint16_t(intOp(inst.b)));
                build.b(ConditionA64::UnsignedLessEqual, fail);
            }
            else
            {
                RegisterA64 temp2 = regs.allocTemp(KindA64::w);
                build.mov(temp2, intOp(inst.b));
                build.cmp(temp, temp2);
                build.b(ConditionA64::UnsignedLessEqual, fail);
            }
        }
        else
            LUAU_ASSERT(!"Unsupported instruction form");

        if (abort.id)
            emitAbort(build, abort);
        break;
    }
    case IrCmd::JUMP_SLOT_MATCH:
    case IrCmd::CHECK_SLOT_MATCH:
    {
        Label abort; // used when guard aborts execution
        const IrOp& mismatchOp = inst.cmd == IrCmd::JUMP_SLOT_MATCH ? inst.d : inst.c;
        Label& mismatch = mismatchOp.kind == IrOpKind::Undef ? abort : labelOp(mismatchOp);

        RegisterA64 temp1 = regs.allocTemp(KindA64::x);
        RegisterA64 temp1w = castReg(KindA64::w, temp1);
        RegisterA64 temp2 = regs.allocTemp(KindA64::x);

        LUAU_ASSERT(offsetof(LuaNode, key.value) == offsetof(LuaNode, key) && kOffsetOfTKeyTagNext >= 8 && kOffsetOfTKeyTagNext < 16);
        build.ldp(temp1, temp2, mem(regOp(inst.a), offsetof(LuaNode, key))); // load key.value into temp1 and key.tt (alongside other bits) into temp2
        build.ubfx(temp2, temp2, (kOffsetOfTKeyTagNext - 8) * 8, kTKeyTagBits); // .tt is right before .next, and 8 bytes are skipped by ldp
        build.cmp(temp2, LUA_TSTRING);
        build.b(ConditionA64::NotEqual, mismatch);

        AddressA64 addr = tempAddr(inst.b, offsetof(TValue, value));
        build.ldr(temp2, addr);
        build.cmp(temp1, temp2);
        build.b(ConditionA64::NotEqual, mismatch);

        build.ldr(temp1w, mem(regOp(inst.a), offsetof(LuaNode, val.tt)));
        LUAU_ASSERT(LUA_TNIL == 0);
        build.cbz(temp1w, mismatch);

        if (inst.cmd == IrCmd::JUMP_SLOT_MATCH)
            jumpOrFallthrough(blockOp(inst.c), next);
        else if (abort.id)
            emitAbort(build, abort);
        break;
    }
    case IrCmd::CHECK_NODE_NO_NEXT:
    {
        Label abort; // used when guard aborts execution
        RegisterA64 temp = regs.allocTemp(KindA64::w);

        build.ldr(temp, mem(regOp(inst.a), offsetof(LuaNode, key) + kOffsetOfTKeyTagNext));
        build.lsr(temp, temp, kTKeyTagBits);
        build.cbnz(temp, inst.b.kind == IrOpKind::Undef ? abort : labelOp(inst.b));

        if (abort.id)
            emitAbort(build, abort);
        break;
    }
    case IrCmd::INTERRUPT:
    {
        regs.spill(build, index);

        Label self;

        build.ldr(x0, mem(rState, offsetof(lua_State, global)));
        build.ldr(x0, mem(x0, offsetof(global_State, cb.interrupt)));
        build.cbnz(x0, self);

        Label next = build.setLabel();

        interruptHandlers.push_back({self, uintOp(inst.a), next});
        break;
    }
    case IrCmd::CHECK_GC:
    {
        RegisterA64 temp1 = regs.allocTemp(KindA64::x);
        RegisterA64 temp2 = regs.allocTemp(KindA64::x);

        Label skip;
        build.ldr(temp1, mem(rState, offsetof(lua_State, global)));
        // TODO: totalbytes and GCthreshold loads can be fused with ldp
        build.ldr(temp2, mem(temp1, offsetof(global_State, totalbytes)));
        build.ldr(temp1, mem(temp1, offsetof(global_State, GCthreshold)));
        build.cmp(temp1, temp2);
        build.b(ConditionA64::UnsignedGreater, skip);

        size_t spills = regs.spill(build, index);

        build.mov(x0, rState);
        build.mov(w1, 1);
        build.ldr(x1, mem(rNativeContext, offsetof(NativeContext, luaC_step)));
        build.blr(x1);

        emitUpdateBase(build);

        regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state

        build.setLabel(skip);
        break;
    }
    case IrCmd::BARRIER_OBJ:
    {
        RegisterA64 temp = regs.allocTemp(KindA64::x);

        Label skip;
        checkObjectBarrierConditions(build, regOp(inst.a), temp, vmRegOp(inst.b), skip);

        RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads
        size_t spills = regs.spill(build, index, {reg});
        build.mov(x1, reg);
        build.mov(x0, rState);
        build.ldr(x2, mem(rBase, vmRegOp(inst.b) * sizeof(TValue) + offsetof(TValue, value)));
        build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barrierf)));
        build.blr(x3);

        regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state

        // note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack
        build.setLabel(skip);
        break;
    }
    case IrCmd::BARRIER_TABLE_BACK:
    {
        Label skip;
        RegisterA64 temp = regs.allocTemp(KindA64::w);

        // isblack(obj2gco(t))
        build.ldrb(temp, mem(regOp(inst.a), offsetof(GCheader, marked)));
        build.tbz(temp, BLACKBIT, skip);

        RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads
        size_t spills = regs.spill(build, index, {reg});
        build.mov(x1, reg);
        build.mov(x0, rState);
        build.add(x2, x1, uint16_t(offsetof(Table, gclist)));
        build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barrierback)));
        build.blr(x3);

        regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state

        // note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack
        build.setLabel(skip);
        break;
    }
    case IrCmd::BARRIER_TABLE_FORWARD:
    {
        RegisterA64 temp = regs.allocTemp(KindA64::x);

        Label skip;
        checkObjectBarrierConditions(build, regOp(inst.a), temp, vmRegOp(inst.b), skip);

        RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads
        size_t spills = regs.spill(build, index, {reg});
        build.mov(x1, reg);
        build.mov(x0, rState);
        build.ldr(x2, mem(rBase, vmRegOp(inst.b) * sizeof(TValue) + offsetof(TValue, value)));
        build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barriertable)));
        build.blr(x3);

        regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state

        // note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack
        build.setLabel(skip);
        break;
    }
    case IrCmd::SET_SAVEDPC:
    {
        RegisterA64 temp1 = regs.allocTemp(KindA64::x);
        RegisterA64 temp2 = regs.allocTemp(KindA64::x);

        emitAddOffset(build, temp1, rCode, uintOp(inst.a) * sizeof(Instruction));
        build.ldr(temp2, mem(rState, offsetof(lua_State, ci)));
        build.str(temp1, mem(temp2, offsetof(CallInfo, savedpc)));
        break;
    }
    case IrCmd::CLOSE_UPVALS:
    {
        Label skip;
        RegisterA64 temp1 = regs.allocTemp(KindA64::x);
        RegisterA64 temp2 = regs.allocTemp(KindA64::x);

        // L->openupval != 0
        build.ldr(temp1, mem(rState, offsetof(lua_State, openupval)));
        build.cbz(temp1, skip);

        // ra <= L->openuval->v
        build.ldr(temp1, mem(temp1, offsetof(UpVal, v)));
        build.add(temp2, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
        build.cmp(temp2, temp1);
        build.b(ConditionA64::UnsignedGreater, skip);

        size_t spills = regs.spill(build, index, {temp2});
        build.mov(x1, temp2);
        build.mov(x0, rState);
        build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaF_close)));
        build.blr(x2);

        regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state

        build.setLabel(skip);
        break;
    }
    case IrCmd::CAPTURE:
        // no-op
        break;
    case IrCmd::SETLIST:
        regs.spill(build, index);
        emitFallback(build, offsetof(NativeContext, executeSETLIST), uintOp(inst.a));
        break;
    case IrCmd::CALL:
        regs.spill(build, index);
        // argtop = (nparams == LUA_MULTRET) ? L->top : ra + 1 + nparams;
        if (intOp(inst.b) == LUA_MULTRET)
            build.ldr(x2, mem(rState, offsetof(lua_State, top)));
        else
            build.add(x2, rBase, uint16_t((vmRegOp(inst.a) + 1 + intOp(inst.b)) * sizeof(TValue)));

        // callFallback(L, ra, argtop, nresults)
        build.mov(x0, rState);
        build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
        build.mov(w3, intOp(inst.c));
        build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, callFallback)));
        build.blr(x4);

        // reentry with x0=closure (NULL will trigger exit)
        build.b(helpers.reentry);
        break;
    case IrCmd::RETURN:
        regs.spill(build, index);

        if (function.variadic)
        {
            build.ldr(x1, mem(rState, offsetof(lua_State, ci)));
            build.ldr(x1, mem(x1, offsetof(CallInfo, func)));
        }
        else if (intOp(inst.b) != 1)
            build.sub(x1, rBase, sizeof(TValue)); // invariant: ci->func + 1 == ci->base for non-variadic frames

        if (intOp(inst.b) == 0)
        {
            build.mov(w2, 0);
            build.b(helpers.return_);
        }
        else if (intOp(inst.b) == 1 && !function.variadic)
        {
            // fast path: minimizes x1 adjustments
            // note that we skipped x1 computation for this specific case above
            build.ldr(q0, mem(rBase, vmRegOp(inst.a) * sizeof(TValue)));
            build.str(q0, mem(rBase, -int(sizeof(TValue))));
            build.mov(x1, rBase);
            build.mov(w2, 1);
            build.b(helpers.return_);
        }
        else if (intOp(inst.b) >= 1 && intOp(inst.b) <= 3)
        {
            for (int r = 0; r < intOp(inst.b); ++r)
            {
                build.ldr(q0, mem(rBase, (vmRegOp(inst.a) + r) * sizeof(TValue)));
                build.str(q0, mem(x1, sizeof(TValue), AddressKindA64::post));
            }
            build.mov(w2, intOp(inst.b));
            build.b(helpers.return_);
        }
        else
        {
            build.mov(w2, 0);

            // vali = ra
            build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));

            // valend = (n == LUA_MULTRET) ? L->top : ra + n
            if (intOp(inst.b) == LUA_MULTRET)
                build.ldr(x4, mem(rState, offsetof(lua_State, top)));
            else
                build.add(x4, rBase, uint16_t((vmRegOp(inst.a) + intOp(inst.b)) * sizeof(TValue)));

            Label repeatValueLoop, exitValueLoop;

            if (intOp(inst.b) == LUA_MULTRET)
            {
                build.cmp(x3, x4);
                build.b(ConditionA64::CarrySet, exitValueLoop); // CarrySet == UnsignedGreaterEqual
            }

            build.setLabel(repeatValueLoop);
            build.ldr(q0, mem(x3, sizeof(TValue), AddressKindA64::post));
            build.str(q0, mem(x1, sizeof(TValue), AddressKindA64::post));
            build.add(w2, w2, 1);
            build.cmp(x3, x4);
            build.b(ConditionA64::CarryClear, repeatValueLoop); // CarryClear == UnsignedLess

            build.setLabel(exitValueLoop);
            build.b(helpers.return_);
        }
        break;
    case IrCmd::FORGLOOP:
        // register layout: ra + 1 = table, ra + 2 = internal index, ra + 3 .. ra + aux = iteration variables
        regs.spill(build, index);
        // clear extra variables since we might have more than two
        if (intOp(inst.b) > 2)
        {
            build.mov(w0, LUA_TNIL);
            for (int i = 2; i < intOp(inst.b); ++i)
                build.str(w0, mem(rBase, (vmRegOp(inst.a) + 3 + i) * sizeof(TValue) + offsetof(TValue, tt)));
        }
        // we use full iter fallback for now; in the future it could be worthwhile to accelerate array iteration here
        build.mov(x0, rState);
        build.ldr(x1, mem(rBase, (vmRegOp(inst.a) + 1) * sizeof(TValue) + offsetof(TValue, value.gc)));
        build.ldr(w2, mem(rBase, (vmRegOp(inst.a) + 2) * sizeof(TValue) + offsetof(TValue, value.p)));
        build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
        build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, forgLoopTableIter)));
        build.blr(x4);
        // note: no emitUpdateBase necessary because forgLoopTableIter does not reallocate stack
        build.cbnz(w0, labelOp(inst.c));
        jumpOrFallthrough(blockOp(inst.d), next);
        break;
    case IrCmd::FORGLOOP_FALLBACK:
        regs.spill(build, index);
        build.mov(x0, rState);
        build.mov(w1, vmRegOp(inst.a));
        build.mov(w2, intOp(inst.b));
        build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, forgLoopNonTableFallback)));
        build.blr(x3);
        emitUpdateBase(build);
        build.cbnz(w0, labelOp(inst.c));
        jumpOrFallthrough(blockOp(inst.d), next);
        break;
    case IrCmd::FORGPREP_XNEXT_FALLBACK:
        regs.spill(build, index);
        build.mov(x0, rState);
        build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
        build.mov(w2, uintOp(inst.a) + 1);
        build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, forgPrepXnextFallback)));
        build.blr(x3);
        // note: no emitUpdateBase necessary because forgLoopNonTableFallback does not reallocate stack
        jumpOrFallthrough(blockOp(inst.c), next);
        break;
    case IrCmd::COVERAGE:
    {
        RegisterA64 temp1 = regs.allocTemp(KindA64::x);
        RegisterA64 temp2 = regs.allocTemp(KindA64::w);
        RegisterA64 temp3 = regs.allocTemp(KindA64::w);

        build.mov(temp1, uintOp(inst.a) * sizeof(Instruction));
        build.ldr(temp2, mem(rCode, temp1));

        // increments E (high 24 bits); if the result overflows a 23-bit counter, high bit becomes 1
        // note: cmp can be eliminated with adds but we aren't concerned with code size for coverage
        build.add(temp3, temp2, 256);
        build.cmp(temp3, 0);
        build.csel(temp2, temp2, temp3, ConditionA64::Less);

        build.str(temp2, mem(rCode, temp1));
        break;
    }

        // Full instruction fallbacks
    case IrCmd::FALLBACK_GETGLOBAL:
        LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg);
        LUAU_ASSERT(inst.c.kind == IrOpKind::VmConst);

        regs.spill(build, index);
        emitFallback(build, offsetof(NativeContext, executeGETGLOBAL), uintOp(inst.a));
        break;
    case IrCmd::FALLBACK_SETGLOBAL:
        LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg);
        LUAU_ASSERT(inst.c.kind == IrOpKind::VmConst);

        regs.spill(build, index);
        emitFallback(build, offsetof(NativeContext, executeSETGLOBAL), uintOp(inst.a));
        break;
    case IrCmd::FALLBACK_GETTABLEKS:
        LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg);
        LUAU_ASSERT(inst.c.kind == IrOpKind::VmReg);
        LUAU_ASSERT(inst.d.kind == IrOpKind::VmConst);

        regs.spill(build, index);
        emitFallback(build, offsetof(NativeContext, executeGETTABLEKS), uintOp(inst.a));
        break;
    case IrCmd::FALLBACK_SETTABLEKS:
        LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg);
        LUAU_ASSERT(inst.c.kind == IrOpKind::VmReg);
        LUAU_ASSERT(inst.d.kind == IrOpKind::VmConst);

        regs.spill(build, index);
        emitFallback(build, offsetof(NativeContext, executeSETTABLEKS), uintOp(inst.a));
        break;
    case IrCmd::FALLBACK_NAMECALL:
        LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg);
        LUAU_ASSERT(inst.c.kind == IrOpKind::VmReg);
        LUAU_ASSERT(inst.d.kind == IrOpKind::VmConst);

        regs.spill(build, index);
        emitFallback(build, offsetof(NativeContext, executeNAMECALL), uintOp(inst.a));
        break;
    case IrCmd::FALLBACK_PREPVARARGS:
        LUAU_ASSERT(inst.b.kind == IrOpKind::Constant);

        regs.spill(build, index);
        emitFallback(build, offsetof(NativeContext, executePREPVARARGS), uintOp(inst.a));
        break;
    case IrCmd::FALLBACK_GETVARARGS:
        LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg);
        LUAU_ASSERT(inst.c.kind == IrOpKind::Constant);

        regs.spill(build, index);
        emitFallback(build, offsetof(NativeContext, executeGETVARARGS), uintOp(inst.a));
        break;
    case IrCmd::FALLBACK_NEWCLOSURE:
        LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg);
        LUAU_ASSERT(inst.c.kind == IrOpKind::Constant);

        regs.spill(build, index);
        emitFallback(build, offsetof(NativeContext, executeNEWCLOSURE), uintOp(inst.a));
        break;
    case IrCmd::FALLBACK_DUPCLOSURE:
        LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg);
        LUAU_ASSERT(inst.c.kind == IrOpKind::VmConst);

        regs.spill(build, index);
        emitFallback(build, offsetof(NativeContext, executeDUPCLOSURE), uintOp(inst.a));
        break;
    case IrCmd::FALLBACK_FORGPREP:
        regs.spill(build, index);
        emitFallback(build, offsetof(NativeContext, executeFORGPREP), uintOp(inst.a));
        jumpOrFallthrough(blockOp(inst.c), next);
        break;

    // Pseudo instructions
    case IrCmd::NOP:
    case IrCmd::SUBSTITUTE:
        LUAU_ASSERT(!"Pseudo instructions should not be lowered");
        break;

    case IrCmd::BITAND_UINT:
    {
        inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
        if (inst.b.kind == IrOpKind::Constant && AssemblyBuilderA64::isMaskSupported(unsigned(intOp(inst.b))))
            build.and_(inst.regA64, regOp(inst.a), unsigned(intOp(inst.b)));
        else
        {
            RegisterA64 temp1 = tempUint(inst.a);
            RegisterA64 temp2 = tempUint(inst.b);
            build.and_(inst.regA64, temp1, temp2);
        }
        break;
    }
    case IrCmd::BITXOR_UINT:
    {
        inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
        if (inst.b.kind == IrOpKind::Constant && AssemblyBuilderA64::isMaskSupported(unsigned(intOp(inst.b))))
            build.eor(inst.regA64, regOp(inst.a), unsigned(intOp(inst.b)));
        else
        {
            RegisterA64 temp1 = tempUint(inst.a);
            RegisterA64 temp2 = tempUint(inst.b);
            build.eor(inst.regA64, temp1, temp2);
        }
        break;
    }
    case IrCmd::BITOR_UINT:
    {
        inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
        if (inst.b.kind == IrOpKind::Constant && AssemblyBuilderA64::isMaskSupported(unsigned(intOp(inst.b))))
            build.orr(inst.regA64, regOp(inst.a), unsigned(intOp(inst.b)));
        else
        {
            RegisterA64 temp1 = tempUint(inst.a);
            RegisterA64 temp2 = tempUint(inst.b);
            build.orr(inst.regA64, temp1, temp2);
        }
        break;
    }
    case IrCmd::BITNOT_UINT:
    {
        inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a});
        RegisterA64 temp = tempUint(inst.a);
        build.mvn_(inst.regA64, temp);
        break;
    }
    case IrCmd::BITLSHIFT_UINT:
    {
        inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
        if (inst.b.kind == IrOpKind::Constant)
            build.lsl(inst.regA64, regOp(inst.a), uint8_t(unsigned(intOp(inst.b)) & 31));
        else
        {
            RegisterA64 temp1 = tempUint(inst.a);
            RegisterA64 temp2 = tempUint(inst.b);
            build.lsl(inst.regA64, temp1, temp2);
        }
        break;
    }
    case IrCmd::BITRSHIFT_UINT:
    {
        inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
        if (inst.b.kind == IrOpKind::Constant)
            build.lsr(inst.regA64, regOp(inst.a), uint8_t(unsigned(intOp(inst.b)) & 31));
        else
        {
            RegisterA64 temp1 = tempUint(inst.a);
            RegisterA64 temp2 = tempUint(inst.b);
            build.lsr(inst.regA64, temp1, temp2);
        }
        break;
    }
    case IrCmd::BITARSHIFT_UINT:
    {
        inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
        if (inst.b.kind == IrOpKind::Constant)
            build.asr(inst.regA64, regOp(inst.a), uint8_t(unsigned(intOp(inst.b)) & 31));
        else
        {
            RegisterA64 temp1 = tempUint(inst.a);
            RegisterA64 temp2 = tempUint(inst.b);
            build.asr(inst.regA64, temp1, temp2);
        }
        break;
    }
    case IrCmd::BITLROTATE_UINT:
    {
        if (inst.b.kind == IrOpKind::Constant)
        {
            inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a});
            build.ror(inst.regA64, regOp(inst.a), uint8_t((32 - unsigned(intOp(inst.b))) & 31));
        }
        else
        {
            inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b}); // can't reuse a because it would be clobbered by neg
            RegisterA64 temp1 = tempUint(inst.a);
            RegisterA64 temp2 = tempUint(inst.b);
            build.neg(inst.regA64, temp2);
            build.ror(inst.regA64, temp1, inst.regA64);
        }
        break;
    }
    case IrCmd::BITRROTATE_UINT:
    {
        inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
        if (inst.b.kind == IrOpKind::Constant)
            build.ror(inst.regA64, regOp(inst.a), uint8_t(unsigned(intOp(inst.b)) & 31));
        else
        {
            RegisterA64 temp1 = tempUint(inst.a);
            RegisterA64 temp2 = tempUint(inst.b);
            build.ror(inst.regA64, temp1, temp2);
        }
        break;
    }
    case IrCmd::BITCOUNTLZ_UINT:
    {
        inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a});
        RegisterA64 temp = tempUint(inst.a);
        build.clz(inst.regA64, temp);
        break;
    }
    case IrCmd::BITCOUNTRZ_UINT:
    {
        inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a});
        RegisterA64 temp = tempUint(inst.a);
        build.rbit(inst.regA64, temp);
        build.clz(inst.regA64, inst.regA64);
        break;
    }
    case IrCmd::INVOKE_LIBM:
    {
        if (inst.c.kind != IrOpKind::None)
        {
            bool isInt = (inst.c.kind == IrOpKind::Constant) ? constOp(inst.c).kind == IrConstKind::Int
                                                             : getCmdValueKind(function.instOp(inst.c).cmd) == IrValueKind::Int;

            RegisterA64 temp1 = tempDouble(inst.b);
            RegisterA64 temp2 = isInt ? tempInt(inst.c) : tempDouble(inst.c);
            RegisterA64 temp3 = isInt ? noreg : regs.allocTemp(KindA64::d); // note: spill() frees all registers so we need to avoid alloc after spill
            regs.spill(build, index, {temp1, temp2});

            if (isInt)
            {
                build.fmov(d0, temp1);
                build.mov(w0, temp2);
            }
            else if (d0 != temp2)
            {
                build.fmov(d0, temp1);
                build.fmov(d1, temp2);
            }
            else
            {
                build.fmov(temp3, d0);
                build.fmov(d0, temp1);
                build.fmov(d1, temp3);
            }
        }
        else
        {
            RegisterA64 temp1 = tempDouble(inst.b);
            regs.spill(build, index, {temp1});
            build.fmov(d0, temp1);
        }

        build.ldr(x1, mem(rNativeContext, getNativeContextOffset(uintOp(inst.a))));
        build.blr(x1);
        inst.regA64 = regs.takeReg(d0, index);
        break;
    }

        // To handle unsupported instructions, add "case IrCmd::OP" and make sure to set error = true!
    }

    valueTracker.afterInstLowering(inst, index);

    regs.freeLastUseRegs(inst, index);
    regs.freeTempRegs();
}

void IrLoweringA64::finishBlock()
{
    regs.assertNoSpills();
}

void IrLoweringA64::finishFunction()
{
    if (build.logText)
        build.logAppend("; interrupt handlers\n");

    for (InterruptHandler& handler : interruptHandlers)
    {
        build.setLabel(handler.self);
        build.mov(x0, (handler.pcpos + 1) * sizeof(Instruction));
        build.adr(x1, handler.next);
        build.b(helpers.interrupt);
    }
}

bool IrLoweringA64::hasError() const
{
    return error;
}

bool IrLoweringA64::isFallthroughBlock(IrBlock target, IrBlock next)
{
    return target.start == next.start;
}

void IrLoweringA64::jumpOrFallthrough(IrBlock& target, IrBlock& next)
{
    if (!isFallthroughBlock(target, next))
        build.b(target.label);
}

RegisterA64 IrLoweringA64::tempDouble(IrOp op)
{
    if (op.kind == IrOpKind::Inst)
        return regOp(op);
    else if (op.kind == IrOpKind::Constant)
    {
        double val = doubleOp(op);

        if (AssemblyBuilderA64::isFmovSupported(val))
        {
            RegisterA64 temp = regs.allocTemp(KindA64::d);
            build.fmov(temp, val);
            return temp;
        }
        else
        {
            RegisterA64 temp1 = regs.allocTemp(KindA64::x);
            RegisterA64 temp2 = regs.allocTemp(KindA64::d);

            uint64_t vali;
            static_assert(sizeof(vali) == sizeof(val), "Expecting double to be 64-bit");
            memcpy(&vali, &val, sizeof(val));

            if ((vali << 16) == 0)
            {
                build.movz(temp1, uint16_t(vali >> 48), 48);
                build.fmov(temp2, temp1);
            }
            else if ((vali << 32) == 0)
            {
                build.movz(temp1, uint16_t(vali >> 48), 48);
                build.movk(temp1, uint16_t(vali >> 32), 32);
                build.fmov(temp2, temp1);
            }
            else
            {
                build.adr(temp1, val);
                build.ldr(temp2, temp1);
            }

            return temp2;
        }
    }
    else
    {
        LUAU_ASSERT(!"Unsupported instruction form");
        return noreg;
    }
}

RegisterA64 IrLoweringA64::tempInt(IrOp op)
{
    if (op.kind == IrOpKind::Inst)
        return regOp(op);
    else if (op.kind == IrOpKind::Constant)
    {
        RegisterA64 temp = regs.allocTemp(KindA64::w);
        build.mov(temp, intOp(op));
        return temp;
    }
    else
    {
        LUAU_ASSERT(!"Unsupported instruction form");
        return noreg;
    }
}

RegisterA64 IrLoweringA64::tempUint(IrOp op)
{
    if (op.kind == IrOpKind::Inst)
        return regOp(op);
    else if (op.kind == IrOpKind::Constant)
    {
        RegisterA64 temp = regs.allocTemp(KindA64::w);
        build.mov(temp, unsigned(intOp(op)));
        return temp;
    }
    else
    {
        LUAU_ASSERT(!"Unsupported instruction form");
        return noreg;
    }
}

AddressA64 IrLoweringA64::tempAddr(IrOp op, int offset)
{
    // This is needed to tighten the bounds checks in the VmConst case below
    LUAU_ASSERT(offset % 4 == 0);

    if (op.kind == IrOpKind::VmReg)
        return mem(rBase, vmRegOp(op) * sizeof(TValue) + offset);
    else if (op.kind == IrOpKind::VmConst)
    {
        size_t constantOffset = vmConstOp(op) * sizeof(TValue) + offset;

        // Note: cumulative offset is guaranteed to be divisible by 4; we can use that to expand the useful range that doesn't require temporaries
        if (constantOffset / 4 <= AddressA64::kMaxOffset)
            return mem(rConstants, int(constantOffset));

        RegisterA64 temp = regs.allocTemp(KindA64::x);

        emitAddOffset(build, temp, rConstants, constantOffset);
        return temp;
    }
    // If we have a register, we assume it's a pointer to TValue
    // We might introduce explicit operand types in the future to make this more robust
    else if (op.kind == IrOpKind::Inst)
        return mem(regOp(op), offset);
    else
    {
        LUAU_ASSERT(!"Unsupported instruction form");
        return noreg;
    }
}

RegisterA64 IrLoweringA64::regOp(IrOp op)
{
    IrInst& inst = function.instOp(op);

    if (inst.spilled || inst.needsReload)
        regs.restoreReg(build, inst);

    LUAU_ASSERT(inst.regA64 != noreg);
    return inst.regA64;
}

IrConst IrLoweringA64::constOp(IrOp op) const
{
    return function.constOp(op);
}

uint8_t IrLoweringA64::tagOp(IrOp op) const
{
    return function.tagOp(op);
}

int IrLoweringA64::intOp(IrOp op) const
{
    return function.intOp(op);
}

unsigned IrLoweringA64::uintOp(IrOp op) const
{
    return function.uintOp(op);
}

double IrLoweringA64::doubleOp(IrOp op) const
{
    return function.doubleOp(op);
}

IrBlock& IrLoweringA64::blockOp(IrOp op) const
{
    return function.blockOp(op);
}

Label& IrLoweringA64::labelOp(IrOp op) const
{
    return blockOp(op).label;
}

} // namespace A64
} // namespace CodeGen
} // namespace Luau