Add Luau CodeGen (jit implementation, currently experimental)

2023-05-20 22:49:37 +01:00 · 2023-05-20 22:49:37 +01:00 · 3bfe1afb96
parent deb042b940
commit 3bfe1afb96
75 changed files with 22454 additions and 4 deletions
--- a/luau/CodeGen/include/Luau/AddressA64.h
+++ b/luau/CodeGen/include/Luau/AddressA64.h
@ -0,0 +1,61 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/RegisterA64.h"
+
+#include <stddef.h>
+
+namespace Luau
+{
+namespace CodeGen
+{
+namespace A64
+{
+
+enum class AddressKindA64 : uint8_t
+{
+    imm, // reg + imm
+    reg, // reg + reg
+
+    // TODO:
+    // reg + reg << shift
+    // reg + sext(reg) << shift
+    // reg + uext(reg) << shift
+};
+
+struct AddressA64
+{
+    // This is a little misleading since AddressA64 can encode offsets up to 1023*size where size depends on the load/store size
+    // For example, ldr x0, [reg+imm] is limited to 8 KB offsets assuming imm is divisible by 8, but loading into w0 reduces the range to 4 KB
+    static constexpr size_t kMaxOffset = 1023;
+
+    constexpr AddressA64(RegisterA64 base, int off = 0)
+        : kind(AddressKindA64::imm)
+        , base(base)
+        , offset(xzr)
+        , data(off)
+    {
+        LUAU_ASSERT(base.kind == KindA64::x || base == sp);
+    }
+
+    constexpr AddressA64(RegisterA64 base, RegisterA64 offset)
+        : kind(AddressKindA64::reg)
+        , base(base)
+        , offset(offset)
+        , data(0)
+    {
+        LUAU_ASSERT(base.kind == KindA64::x);
+        LUAU_ASSERT(offset.kind == KindA64::x);
+    }
+
+    AddressKindA64 kind;
+    RegisterA64 base;
+    RegisterA64 offset;
+    int data;
+};
+
+using mem = AddressA64;
+
+} // namespace A64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/AssemblyBuilderA64.h
+++ b/luau/CodeGen/include/Luau/AssemblyBuilderA64.h
@ -0,0 +1,280 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/RegisterA64.h"
+#include "Luau/AddressA64.h"
+#include "Luau/ConditionA64.h"
+#include "Luau/Label.h"
+
+#include <string>
+#include <vector>
+
+namespace Luau
+{
+namespace CodeGen
+{
+namespace A64
+{
+
+enum FeaturesA64
+{
+    Feature_JSCVT = 1 << 0,
+};
+
+class AssemblyBuilderA64
+{
+public:
+    explicit AssemblyBuilderA64(bool logText, unsigned int features = 0);
+    ~AssemblyBuilderA64();
+
+    // Moves
+    void mov(RegisterA64 dst, RegisterA64 src);
+    void mov(RegisterA64 dst, int src); // macro
+
+    // Moves of 32-bit immediates get decomposed into one or more of these
+    void movz(RegisterA64 dst, uint16_t src, int shift = 0);
+    void movn(RegisterA64 dst, uint16_t src, int shift = 0);
+    void movk(RegisterA64 dst, uint16_t src, int shift = 0);
+
+    // Arithmetics
+    void add(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
+    void add(RegisterA64 dst, RegisterA64 src1, uint16_t src2);
+    void sub(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
+    void sub(RegisterA64 dst, RegisterA64 src1, uint16_t src2);
+    void neg(RegisterA64 dst, RegisterA64 src);
+
+    // Comparisons
+    // Note: some arithmetic instructions also have versions that update flags (ADDS etc) but we aren't using them atm
+    void cmp(RegisterA64 src1, RegisterA64 src2);
+    void cmp(RegisterA64 src1, uint16_t src2);
+    void csel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond);
+    void cset(RegisterA64 dst, ConditionA64 cond);
+
+    // Bitwise
+    void and_(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
+    void orr(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
+    void eor(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
+    void bic(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
+    void tst(RegisterA64 src1, RegisterA64 src2, int shift = 0);
+    void mvn_(RegisterA64 dst, RegisterA64 src);
+
+    // Bitwise with immediate
+    // Note: immediate must have a single contiguous sequence of 1 bits set of length 1..31
+    void and_(RegisterA64 dst, RegisterA64 src1, uint32_t src2);
+    void orr(RegisterA64 dst, RegisterA64 src1, uint32_t src2);
+    void eor(RegisterA64 dst, RegisterA64 src1, uint32_t src2);
+    void tst(RegisterA64 src1, uint32_t src2);
+
+    // Shifts
+    void lsl(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
+    void lsr(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
+    void asr(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
+    void ror(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
+    void clz(RegisterA64 dst, RegisterA64 src);
+    void rbit(RegisterA64 dst, RegisterA64 src);
+
+    // Shifts with immediates
+    // Note: immediate value must be in [0, 31] or [0, 63] range based on register type
+    void lsl(RegisterA64 dst, RegisterA64 src1, uint8_t src2);
+    void lsr(RegisterA64 dst, RegisterA64 src1, uint8_t src2);
+    void asr(RegisterA64 dst, RegisterA64 src1, uint8_t src2);
+    void ror(RegisterA64 dst, RegisterA64 src1, uint8_t src2);
+
+    // Bitfields
+    void ubfiz(RegisterA64 dst, RegisterA64 src, uint8_t f, uint8_t w);
+    void ubfx(RegisterA64 dst, RegisterA64 src, uint8_t f, uint8_t w);
+    void sbfiz(RegisterA64 dst, RegisterA64 src, uint8_t f, uint8_t w);
+    void sbfx(RegisterA64 dst, RegisterA64 src, uint8_t f, uint8_t w);
+
+    // Load
+    // Note: paired loads are currently omitted for simplicity
+    void ldr(RegisterA64 dst, AddressA64 src);
+    void ldrb(RegisterA64 dst, AddressA64 src);
+    void ldrh(RegisterA64 dst, AddressA64 src);
+    void ldrsb(RegisterA64 dst, AddressA64 src);
+    void ldrsh(RegisterA64 dst, AddressA64 src);
+    void ldrsw(RegisterA64 dst, AddressA64 src);
+    void ldp(RegisterA64 dst1, RegisterA64 dst2, AddressA64 src);
+
+    // Store
+    void str(RegisterA64 src, AddressA64 dst);
+    void strb(RegisterA64 src, AddressA64 dst);
+    void strh(RegisterA64 src, AddressA64 dst);
+    void stp(RegisterA64 src1, RegisterA64 src2, AddressA64 dst);
+
+    // Control flow
+    void b(Label& label);
+    void bl(Label& label);
+    void br(RegisterA64 src);
+    void blr(RegisterA64 src);
+    void ret();
+
+    // Conditional control flow
+    void b(ConditionA64 cond, Label& label);
+    void cbz(RegisterA64 src, Label& label);
+    void cbnz(RegisterA64 src, Label& label);
+    void tbz(RegisterA64 src, uint8_t bit, Label& label);
+    void tbnz(RegisterA64 src, uint8_t bit, Label& label);
+
+    // Address of embedded data
+    void adr(RegisterA64 dst, const void* ptr, size_t size);
+    void adr(RegisterA64 dst, uint64_t value);
+    void adr(RegisterA64 dst, double value);
+
+    // Address of code (label)
+    void adr(RegisterA64 dst, Label& label);
+
+    // Floating-point scalar moves
+    // Note: constant must be compatible with immediate floating point moves (see isFmovSupported)
+    void fmov(RegisterA64 dst, RegisterA64 src);
+    void fmov(RegisterA64 dst, double src);
+
+    // Floating-point scalar math
+    void fabs(RegisterA64 dst, RegisterA64 src);
+    void fadd(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
+    void fdiv(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
+    void fmul(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
+    void fneg(RegisterA64 dst, RegisterA64 src);
+    void fsqrt(RegisterA64 dst, RegisterA64 src);
+    void fsub(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
+
+    // Floating-point rounding and conversions
+    void frinta(RegisterA64 dst, RegisterA64 src);
+    void frintm(RegisterA64 dst, RegisterA64 src);
+    void frintp(RegisterA64 dst, RegisterA64 src);
+    void fcvt(RegisterA64 dst, RegisterA64 src);
+    void fcvtzs(RegisterA64 dst, RegisterA64 src);
+    void fcvtzu(RegisterA64 dst, RegisterA64 src);
+    void scvtf(RegisterA64 dst, RegisterA64 src);
+    void ucvtf(RegisterA64 dst, RegisterA64 src);
+
+    // Floating-point conversion to integer using JS rules (wrap around 2^32) and set Z flag
+    // note: this is part of ARM8.3 (JSCVT feature); support of this instruction needs to be checked at runtime
+    void fjcvtzs(RegisterA64 dst, RegisterA64 src);
+
+    // Floating-point comparisons
+    void fcmp(RegisterA64 src1, RegisterA64 src2);
+    void fcmpz(RegisterA64 src);
+    void fcsel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond);
+
+    // Run final checks
+    bool finalize();
+
+    // Places a label at current location and returns it
+    Label setLabel();
+
+    // Assigns label position to the current location
+    void setLabel(Label& label);
+
+    // Extracts code offset (in bytes) from label
+    uint32_t getLabelOffset(const Label& label)
+    {
+        LUAU_ASSERT(label.location != ~0u);
+        return label.location * 4;
+    }
+
+    void logAppend(const char* fmt, ...) LUAU_PRINTF_ATTR(2, 3);
+
+    uint32_t getCodeSize() const;
+
+    // Resulting data and code that need to be copied over one after the other
+    // The *end* of 'data' has to be aligned to 16 bytes, this will also align 'code'
+    std::vector<uint8_t> data;
+    std::vector<uint32_t> code;
+
+    std::string text;
+
+    const bool logText = false;
+    const unsigned int features = 0;
+
+    // Maximum immediate argument to functions like add/sub/cmp
+    static constexpr size_t kMaxImmediate = (1 << 12) - 1;
+
+    // Check if immediate mode mask is supported for bitwise operations (and/or/xor)
+    static bool isMaskSupported(uint32_t mask);
+
+    // Check if fmov can be used to synthesize a constant
+    static bool isFmovSupported(double value);
+
+private:
+    // Instruction archetypes
+    void place0(const char* name, uint32_t word);
+    void placeSR3(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, int shift = 0, int N = 0);
+    void placeSR2(const char* name, RegisterA64 dst, RegisterA64 src, uint8_t op, uint8_t op2 = 0);
+    void placeR3(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, uint8_t op2);
+    void placeR1(const char* name, RegisterA64 dst, RegisterA64 src, uint32_t op);
+    void placeI12(const char* name, RegisterA64 dst, RegisterA64 src1, int src2, uint8_t op);
+    void placeI16(const char* name, RegisterA64 dst, int src, uint8_t op, int shift = 0);
+    void placeA(const char* name, RegisterA64 dst, AddressA64 src, uint16_t opsize, int sizelog);
+    void placeB(const char* name, Label& label, uint8_t op);
+    void placeBC(const char* name, Label& label, uint8_t op, uint8_t cond);
+    void placeBCR(const char* name, Label& label, uint8_t op, RegisterA64 cond);
+    void placeBR(const char* name, RegisterA64 src, uint32_t op);
+    void placeBTR(const char* name, Label& label, uint8_t op, RegisterA64 cond, uint8_t bit);
+    void placeADR(const char* name, RegisterA64 src, uint8_t op);
+    void placeADR(const char* name, RegisterA64 src, uint8_t op, Label& label);
+    void placeP(const char* name, RegisterA64 dst1, RegisterA64 dst2, AddressA64 src, uint8_t op, uint8_t opc, int sizelog);
+    void placeCS(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond, uint8_t op, uint8_t opc, int invert = 0);
+    void placeFCMP(const char* name, RegisterA64 src1, RegisterA64 src2, uint8_t op, uint8_t opc);
+    void placeFMOV(const char* name, RegisterA64 dst, double src, uint32_t op);
+    void placeBM(const char* name, RegisterA64 dst, RegisterA64 src1, uint32_t src2, uint8_t op);
+    void placeBFM(const char* name, RegisterA64 dst, RegisterA64 src1, int src2, uint8_t op, int immr, int imms);
+
+    void place(uint32_t word);
+
+    struct Patch
+    {
+        enum Kind
+        {
+            Imm26,
+            Imm19,
+            Imm14,
+        };
+
+        Kind kind : 2;
+        uint32_t label : 30;
+        uint32_t location;
+    };
+
+    void patchLabel(Label& label, Patch::Kind kind);
+    void patchOffset(uint32_t location, int value, Patch::Kind kind);
+
+    void commit();
+    LUAU_NOINLINE void extend();
+
+    // Data
+    size_t allocateData(size_t size, size_t align);
+
+    // Logging of assembly in text form
+    LUAU_NOINLINE void log(const char* opcode);
+    LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
+    LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src1, int src2);
+    LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src);
+    LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, int src, int shift = 0);
+    LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, double src);
+    LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, AddressA64 src);
+    LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst1, RegisterA64 dst2, AddressA64 src);
+    LUAU_NOINLINE void log(const char* opcode, RegisterA64 src, Label label, int imm = -1);
+    LUAU_NOINLINE void log(const char* opcode, RegisterA64 src);
+    LUAU_NOINLINE void log(const char* opcode, Label label);
+    LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond);
+    LUAU_NOINLINE void log(Label label);
+    LUAU_NOINLINE void log(RegisterA64 reg);
+    LUAU_NOINLINE void log(AddressA64 addr);
+
+    uint32_t nextLabel = 1;
+    std::vector<Patch> pendingLabels;
+    std::vector<uint32_t> labelLocations;
+
+    bool finalized = false;
+    bool overflowed = false;
+
+    size_t dataPos = 0;
+
+    uint32_t* codePos = nullptr;
+    uint32_t* codeEnd = nullptr;
+};
+
+} // namespace A64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/AssemblyBuilderX64.h
+++ b/luau/CodeGen/include/Luau/AssemblyBuilderX64.h
@ -0,0 +1,266 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/Common.h"
+#include "Luau/DenseHash.h"
+#include "Luau/Label.h"
+#include "Luau/ConditionX64.h"
+#include "Luau/OperandX64.h"
+#include "Luau/RegisterX64.h"
+
+#include <string>
+#include <vector>
+
+namespace Luau
+{
+namespace CodeGen
+{
+namespace X64
+{
+
+enum class RoundingModeX64
+{
+    RoundToNearestEven = 0b00,
+    RoundToNegativeInfinity = 0b01,
+    RoundToPositiveInfinity = 0b10,
+    RoundToZero = 0b11,
+};
+
+enum class AlignmentDataX64
+{
+    Nop,
+    Int3,
+    Ud2, // int3 will be used as a fall-back if it doesn't fit
+};
+
+enum class ABIX64
+{
+    Windows,
+    SystemV,
+};
+
+class AssemblyBuilderX64
+{
+public:
+    explicit AssemblyBuilderX64(bool logText, ABIX64 abi);
+    explicit AssemblyBuilderX64(bool logText);
+    ~AssemblyBuilderX64();
+
+    // Base two operand instructions with 9 opcode selection
+    void add(OperandX64 lhs, OperandX64 rhs);
+    void sub(OperandX64 lhs, OperandX64 rhs);
+    void cmp(OperandX64 lhs, OperandX64 rhs);
+    void and_(OperandX64 lhs, OperandX64 rhs);
+    void or_(OperandX64 lhs, OperandX64 rhs);
+    void xor_(OperandX64 lhs, OperandX64 rhs);
+
+    // Binary shift instructions with special rhs handling
+    void sal(OperandX64 lhs, OperandX64 rhs);
+    void sar(OperandX64 lhs, OperandX64 rhs);
+    void shl(OperandX64 lhs, OperandX64 rhs);
+    void shr(OperandX64 lhs, OperandX64 rhs);
+    void rol(OperandX64 lhs, OperandX64 rhs);
+    void ror(OperandX64 lhs, OperandX64 rhs);
+
+    // Two operand mov instruction has additional specialized encodings
+    void mov(OperandX64 lhs, OperandX64 rhs);
+    void mov64(RegisterX64 lhs, int64_t imm);
+    void movsx(RegisterX64 lhs, OperandX64 rhs);
+    void movzx(RegisterX64 lhs, OperandX64 rhs);
+
+    // Base one operand instruction with 2 opcode selection
+    void div(OperandX64 op);
+    void idiv(OperandX64 op);
+    void mul(OperandX64 op);
+    void imul(OperandX64 op);
+    void neg(OperandX64 op);
+    void not_(OperandX64 op);
+    void dec(OperandX64 op);
+    void inc(OperandX64 op);
+
+    // Additional forms of imul
+    void imul(OperandX64 lhs, OperandX64 rhs);
+    void imul(OperandX64 dst, OperandX64 lhs, int32_t rhs);
+
+    void test(OperandX64 lhs, OperandX64 rhs);
+    void lea(OperandX64 lhs, OperandX64 rhs);
+    void setcc(ConditionX64 cond, OperandX64 op);
+
+    void push(OperandX64 op);
+    void pop(OperandX64 op);
+    void ret();
+
+    // Control flow
+    void jcc(ConditionX64 cond, Label& label);
+    void jmp(Label& label);
+    void jmp(OperandX64 op);
+
+    void call(Label& label);
+    void call(OperandX64 op);
+
+    void int3();
+
+    void bsr(RegisterX64 dst, OperandX64 src);
+    void bsf(RegisterX64 dst, OperandX64 src);
+
+    // Code alignment
+    void nop(uint32_t length = 1);
+    void align(uint32_t alignment, AlignmentDataX64 data = AlignmentDataX64::Nop);
+
+    // AVX
+    void vaddpd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
+    void vaddps(OperandX64 dst, OperandX64 src1, OperandX64 src2);
+    void vaddsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
+    void vaddss(OperandX64 dst, OperandX64 src1, OperandX64 src2);
+
+    void vsubsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
+    void vmulsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
+    void vdivsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
+
+    void vandpd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
+    void vandnpd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
+
+    void vxorpd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
+    void vorpd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
+
+    void vucomisd(OperandX64 src1, OperandX64 src2);
+
+    void vcvttsd2si(OperandX64 dst, OperandX64 src);
+    void vcvtsi2sd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
+    void vcvtsd2ss(OperandX64 dst, OperandX64 src1, OperandX64 src2);
+
+    void vroundsd(OperandX64 dst, OperandX64 src1, OperandX64 src2, RoundingModeX64 roundingMode); // inexact
+
+    void vsqrtpd(OperandX64 dst, OperandX64 src);
+    void vsqrtps(OperandX64 dst, OperandX64 src);
+    void vsqrtsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
+    void vsqrtss(OperandX64 dst, OperandX64 src1, OperandX64 src2);
+
+    void vmovsd(OperandX64 dst, OperandX64 src);
+    void vmovsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
+    void vmovss(OperandX64 dst, OperandX64 src);
+    void vmovss(OperandX64 dst, OperandX64 src1, OperandX64 src2);
+    void vmovapd(OperandX64 dst, OperandX64 src);
+    void vmovaps(OperandX64 dst, OperandX64 src);
+    void vmovupd(OperandX64 dst, OperandX64 src);
+    void vmovups(OperandX64 dst, OperandX64 src);
+    void vmovq(OperandX64 lhs, OperandX64 rhs);
+
+    void vmaxsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
+    void vminsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
+
+    void vcmpltsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
+
+    void vblendvpd(RegisterX64 dst, RegisterX64 src1, OperandX64 mask, RegisterX64 src3);
+
+
+    // Run final checks
+    bool finalize();
+
+    // Places a label at current location and returns it
+    Label setLabel();
+
+    // Assigns label position to the current location
+    void setLabel(Label& label);
+
+    // Extracts code offset (in bytes) from label
+    uint32_t getLabelOffset(const Label& label)
+    {
+        LUAU_ASSERT(label.location != ~0u);
+        return label.location;
+    }
+
+    // Constant allocation (uses rip-relative addressing)
+    OperandX64 i64(int64_t value);
+    OperandX64 f32(float value);
+    OperandX64 f64(double value);
+    OperandX64 f32x4(float x, float y, float z, float w);
+    OperandX64 f64x2(double x, double y);
+    OperandX64 bytes(const void* ptr, size_t size, size_t align = 8);
+
+    void logAppend(const char* fmt, ...) LUAU_PRINTF_ATTR(2, 3);
+
+    uint32_t getCodeSize() const;
+
+    // Resulting data and code that need to be copied over one after the other
+    // The *end* of 'data' has to be aligned to 16 bytes, this will also align 'code'
+    std::vector<uint8_t> data;
+    std::vector<uint8_t> code;
+
+    std::string text;
+
+    const bool logText = false;
+
+    const ABIX64 abi;
+
+private:
+    // Instruction archetypes
+    void placeBinary(const char* name, OperandX64 lhs, OperandX64 rhs, uint8_t codeimm8, uint8_t codeimm, uint8_t codeimmImm8, uint8_t code8rev,
+        uint8_t coderev, uint8_t code8, uint8_t code, uint8_t opreg);
+    void placeBinaryRegMemAndImm(OperandX64 lhs, OperandX64 rhs, uint8_t code8, uint8_t code, uint8_t codeImm8, uint8_t opreg);
+    void placeBinaryRegAndRegMem(OperandX64 lhs, OperandX64 rhs, uint8_t code8, uint8_t code);
+    void placeBinaryRegMemAndReg(OperandX64 lhs, OperandX64 rhs, uint8_t code8, uint8_t code);
+
+    void placeUnaryModRegMem(const char* name, OperandX64 op, uint8_t code8, uint8_t code, uint8_t opreg);
+
+    void placeShift(const char* name, OperandX64 lhs, OperandX64 rhs, uint8_t opreg);
+
+    void placeJcc(const char* name, Label& label, uint8_t cc);
+
+    void placeAvx(const char* name, OperandX64 dst, OperandX64 src, uint8_t code, bool setW, uint8_t mode, uint8_t prefix);
+    void placeAvx(const char* name, OperandX64 dst, OperandX64 src, uint8_t code, uint8_t coderev, bool setW, uint8_t mode, uint8_t prefix);
+    void placeAvx(const char* name, OperandX64 dst, OperandX64 src1, OperandX64 src2, uint8_t code, bool setW, uint8_t mode, uint8_t prefix);
+    void placeAvx(
+        const char* name, OperandX64 dst, OperandX64 src1, OperandX64 src2, uint8_t imm8, uint8_t code, bool setW, uint8_t mode, uint8_t prefix);
+
+    // Instruction components
+    void placeRegAndModRegMem(OperandX64 lhs, OperandX64 rhs, int32_t extraCodeBytes = 0);
+    void placeModRegMem(OperandX64 rhs, uint8_t regop, int32_t extraCodeBytes = 0);
+    void placeRex(RegisterX64 op);
+    void placeRex(OperandX64 op);
+    void placeRexNoW(OperandX64 op);
+    void placeRex(RegisterX64 lhs, OperandX64 rhs);
+    void placeVex(OperandX64 dst, OperandX64 src1, OperandX64 src2, bool setW, uint8_t mode, uint8_t prefix);
+    void placeImm8Or32(int32_t imm);
+    void placeImm8(int32_t imm);
+    void placeImm32(int32_t imm);
+    void placeImm64(int64_t imm);
+    void placeLabel(Label& label);
+    void place(uint8_t byte);
+
+    void commit();
+    LUAU_NOINLINE void extend();
+
+    // Data
+    size_t allocateData(size_t size, size_t align);
+
+    // Logging of assembly in text form (Intel asm with VS disassembly formatting)
+    LUAU_NOINLINE void log(const char* opcode);
+    LUAU_NOINLINE void log(const char* opcode, OperandX64 op);
+    LUAU_NOINLINE void log(const char* opcode, OperandX64 op1, OperandX64 op2);
+    LUAU_NOINLINE void log(const char* opcode, OperandX64 op1, OperandX64 op2, OperandX64 op3);
+    LUAU_NOINLINE void log(const char* opcode, OperandX64 op1, OperandX64 op2, OperandX64 op3, OperandX64 op4);
+    LUAU_NOINLINE void log(Label label);
+    LUAU_NOINLINE void log(const char* opcode, Label label);
+    void log(OperandX64 op);
+
+    const char* getSizeName(SizeX64 size) const;
+    const char* getRegisterName(RegisterX64 reg) const;
+
+    uint32_t nextLabel = 1;
+    std::vector<Label> pendingLabels;
+    std::vector<uint32_t> labelLocations;
+
+    DenseHashMap<uint64_t, int32_t> constCache64;
+
+    bool finalized = false;
+
+    size_t dataPos = 0;
+
+    uint8_t* codePos = nullptr;
+    uint8_t* codeEnd = nullptr;
+};
+
+} // namespace X64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/CodeAllocator.h
+++ b/luau/CodeGen/include/Luau/CodeAllocator.h
@ -0,0 +1,56 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include <vector>
+
+#include <stddef.h>
+#include <stdint.h>
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+constexpr uint32_t kCodeAlignment = 32;
+
+struct CodeAllocator
+{
+    CodeAllocator(size_t blockSize, size_t maxTotalSize);
+    ~CodeAllocator();
+
+    // Places data and code into the executable page area
+    // To allow allocation while previously allocated code is already running, allocation has page granularity
+    // It's important to group functions together so that page alignment won't result in a lot of wasted space
+    bool allocate(
+        const uint8_t* data, size_t dataSize, const uint8_t* code, size_t codeSize, uint8_t*& result, size_t& resultSize, uint8_t*& resultCodeStart);
+
+    // Provided to callbacks
+    void* context = nullptr;
+
+    // Called when new block is created to create and setup the unwinding information for all the code in the block
+    // 'startOffset' reserves space for data at the beginning of the page
+    void* (*createBlockUnwindInfo)(void* context, uint8_t* block, size_t blockSize, size_t& startOffset) = nullptr;
+
+    // Called to destroy unwinding information returned by 'createBlockUnwindInfo'
+    void (*destroyBlockUnwindInfo)(void* context, void* unwindData) = nullptr;
+
+    // Unwind information can be placed inside the block with some implementation-specific reservations at the beginning
+    // But to simplify block space checks, we limit the max size of all that data
+    static const size_t kMaxReservedDataSize = 256;
+
+    bool allocateNewBlock(size_t& unwindInfoSize);
+
+    // Current block we use for allocations
+    uint8_t* blockPos = nullptr;
+    uint8_t* blockEnd = nullptr;
+
+    // All allocated blocks
+    std::vector<uint8_t*> blocks;
+    std::vector<void*> unwindInfos;
+
+    size_t blockSize = 0;
+    size_t maxTotalSize = 0;
+};
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/CodeBlockUnwind.h
+++ b/luau/CodeGen/include/Luau/CodeBlockUnwind.h
@ -0,0 +1,19 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+// context must be an UnwindBuilder
+void* createBlockUnwindInfo(void* context, uint8_t* block, size_t blockSize, size_t& startOffset);
+void destroyBlockUnwindInfo(void* context, void* unwindData);
+
+bool isUnwindSupported();
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/CodeGen.h
+++ b/luau/CodeGen/include/Luau/CodeGen.h
@ -0,0 +1,45 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include <string>
+
+#include <stdint.h>
+
+struct lua_State;
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+bool isSupported();
+
+void create(lua_State* L);
+
+// Builds target function and all inner functions
+void compile(lua_State* L, int idx);
+
+using AnnotatorFn = void (*)(void* context, std::string& result, int fid, int instpos);
+
+struct AssemblyOptions
+{
+    bool outputBinary = false;
+
+    bool includeAssembly = false;
+    bool includeIr = false;
+    bool includeOutlinedCode = false;
+
+    // Optional annotator function can be provided to describe each instruction, it takes function id and sequential instruction id
+    AnnotatorFn annotator = nullptr;
+    void* annotatorContext = nullptr;
+};
+
+// Generates assembly for target function and all inner functions
+std::string getAssembly(lua_State* L, int idx, AssemblyOptions options = {});
+
+using PerfLogFn = void (*)(void* context, uintptr_t addr, unsigned size, const char* symbol);
+
+void setPerfLog(void* context, PerfLogFn logFn);
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/ConditionA64.h
+++ b/luau/CodeGen/include/Luau/ConditionA64.h
@ -0,0 +1,57 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+namespace Luau
+{
+namespace CodeGen
+{
+namespace A64
+{
+
+// See Table C1-1 on page C1-229 of Arm ARM for A-profile architecture
+enum class ConditionA64
+{
+    // EQ: integer (equal), floating-point (equal)
+    Equal,
+    // NE: integer (not equal), floating-point (not equal or unordered)
+    NotEqual,
+
+    // CS: integer (carry set), unsigned integer (greater than, equal), floating-point (greater than, equal or unordered)
+    CarrySet,
+    // CC: integer (carry clear), unsigned integer (less than), floating-point (less than)
+    CarryClear,
+
+    // MI: integer (negative), floating-point (less than)
+    Minus,
+    // PL: integer (positive or zero), floating-point (greater than, equal or unordered)
+    Plus,
+
+    // VS: integer (overflow), floating-point (unordered)
+    Overflow,
+    // VC: integer (no overflow), floating-point (ordered)
+    NoOverflow,
+
+    // HI: integer (unsigned higher), floating-point (greater than, or unordered)
+    UnsignedGreater,
+    // LS: integer (unsigned lower or same), floating-point (less than or equal)
+    UnsignedLessEqual,
+
+    // GE: integer (signed greater than or equal), floating-point (greater than or equal)
+    GreaterEqual,
+    // LT: integer (signed less than), floating-point (less than, or unordered)
+    Less,
+
+    // GT: integer (signed greater than), floating-point (greater than)
+    Greater,
+    // LE: integer (signed less than or equal), floating-point (less than, equal or unordered)
+    LessEqual,
+
+    // AL: always
+    Always,
+
+    Count
+};
+
+} // namespace A64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/ConditionX64.h
+++ b/luau/CodeGen/include/Luau/ConditionX64.h
@ -0,0 +1,47 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+enum class ConditionX64 : uint8_t
+{
+    Overflow,
+    NoOverflow,
+
+    Carry,
+    NoCarry,
+
+    Below,
+    BelowEqual,
+    Above,
+    AboveEqual,
+    Equal,
+    Less,
+    LessEqual,
+    Greater,
+    GreaterEqual,
+
+    NotBelow,
+    NotBelowEqual,
+    NotAbove,
+    NotAboveEqual,
+    NotEqual,
+    NotLess,
+    NotLessEqual,
+    NotGreater,
+    NotGreaterEqual,
+
+    Zero,
+    NotZero,
+
+    Parity,
+    NotParity,
+
+    Count
+};
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/IrAnalysis.h
+++ b/luau/CodeGen/include/Luau/IrAnalysis.h
@ -0,0 +1,99 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include <bitset>
+#include <utility>
+#include <vector>
+
+#include <stdint.h>
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+struct IrBlock;
+struct IrFunction;
+
+void updateUseCounts(IrFunction& function);
+
+void updateLastUseLocations(IrFunction& function);
+
+uint32_t getNextInstUse(IrFunction& function, uint32_t targetInstIdx, uint32_t startInstIdx);
+
+// Returns how many values are coming into the block (live in) and how many are coming out of the block (live out)
+std::pair<uint32_t, uint32_t> getLiveInOutValueCount(IrFunction& function, IrBlock& block);
+uint32_t getLiveInValueCount(IrFunction& function, IrBlock& block);
+uint32_t getLiveOutValueCount(IrFunction& function, IrBlock& block);
+
+struct RegisterSet
+{
+    std::bitset<256> regs;
+
+    // If variadic sequence is active, we track register from which it starts
+    bool varargSeq = false;
+    uint8_t varargStart = 0;
+};
+
+void requireVariadicSequence(RegisterSet& sourceRs, const RegisterSet& defRs, uint8_t varargStart);
+
+struct CfgInfo
+{
+    std::vector<uint32_t> predecessors;
+    std::vector<uint32_t> predecessorsOffsets;
+
+    std::vector<uint32_t> successors;
+    std::vector<uint32_t> successorsOffsets;
+
+    // VM registers that are live when the block is entered
+    // Additionally, an active variadic sequence can exist at the entry of the block
+    std::vector<RegisterSet> in;
+
+    // VM registers that are defined inside the block
+    // It can also contain a variadic sequence definition if that hasn't been consumed inside the block
+    // Note that this means that checking 'def' set might not be enough to say that register has not been written to
+    std::vector<RegisterSet> def;
+
+    // VM registers that are coming out from the block
+    // These might be registers that are defined inside the block or have been defined at the entry of the block
+    // Additionally, an active variadic sequence can exist at the exit of the block
+    std::vector<RegisterSet> out;
+
+    // VM registers captured by nested closures
+    // This set can never have an active variadic sequence
+    RegisterSet captured;
+};
+
+void computeCfgInfo(IrFunction& function);
+
+struct BlockIteratorWrapper
+{
+    const uint32_t* itBegin = nullptr;
+    const uint32_t* itEnd = nullptr;
+
+    bool empty() const
+    {
+        return itBegin == itEnd;
+    }
+
+    size_t size() const
+    {
+        return size_t(itEnd - itBegin);
+    }
+
+    const uint32_t* begin() const
+    {
+        return itBegin;
+    }
+
+    const uint32_t* end() const
+    {
+        return itEnd;
+    }
+};
+
+BlockIteratorWrapper predecessors(const CfgInfo& cfg, uint32_t blockIdx);
+BlockIteratorWrapper successors(const CfgInfo& cfg, uint32_t blockIdx);
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/IrBuilder.h
+++ b/luau/CodeGen/include/Luau/IrBuilder.h
@ -0,0 +1,117 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/Bytecode.h"
+#include "Luau/Common.h"
+#include "Luau/DenseHash.h"
+#include "Luau/IrData.h"
+
+#include <vector>
+
+struct Proto;
+typedef uint32_t Instruction;
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+struct AssemblyOptions;
+
+struct IrBuilder
+{
+    IrBuilder();
+
+    void buildFunctionIr(Proto* proto);
+
+    void rebuildBytecodeBasicBlocks(Proto* proto);
+    void translateInst(LuauOpcode op, const Instruction* pc, int i);
+
+    bool isInternalBlock(IrOp block);
+    void beginBlock(IrOp block);
+
+    void loadAndCheckTag(IrOp loc, uint8_t tag, IrOp fallback);
+
+    // Clones all instructions into the current block
+    // Source block that is cloned cannot use values coming in from a predecessor
+    void clone(const IrBlock& source, bool removeCurrentTerminator);
+
+    IrOp undef();
+
+    IrOp constBool(bool value);
+    IrOp constInt(int value);
+    IrOp constUint(unsigned value);
+    IrOp constDouble(double value);
+    IrOp constTag(uint8_t value);
+    IrOp constAny(IrConst constant, uint64_t asCommonKey);
+
+    IrOp cond(IrCondition cond);
+
+    IrOp inst(IrCmd cmd);
+    IrOp inst(IrCmd cmd, IrOp a);
+    IrOp inst(IrCmd cmd, IrOp a, IrOp b);
+    IrOp inst(IrCmd cmd, IrOp a, IrOp b, IrOp c);
+    IrOp inst(IrCmd cmd, IrOp a, IrOp b, IrOp c, IrOp d);
+    IrOp inst(IrCmd cmd, IrOp a, IrOp b, IrOp c, IrOp d, IrOp e);
+    IrOp inst(IrCmd cmd, IrOp a, IrOp b, IrOp c, IrOp d, IrOp e, IrOp f);
+
+    IrOp block(IrBlockKind kind); // Requested kind can be ignored if we are in an outlined sequence
+    IrOp blockAtInst(uint32_t index);
+
+    IrOp vmReg(uint8_t index);
+    IrOp vmConst(uint32_t index);
+    IrOp vmUpvalue(uint8_t index);
+
+    bool inTerminatedBlock = false;
+
+    bool activeFastcallFallback = false;
+    IrOp fastcallFallbackReturn;
+
+    IrFunction function;
+
+    uint32_t activeBlockIdx = ~0u;
+
+    std::vector<uint32_t> instIndexToBlock; // Block index at the bytecode instruction
+
+    // Similar to BytecodeBuilder, duplicate constants are removed used the same method
+    struct ConstantKey
+    {
+        IrConstKind kind;
+        // Note: this stores value* from IrConst; when kind is Double, this stores the same bits as double does but in uint64_t.
+        uint64_t value;
+
+        bool operator==(const ConstantKey& key) const
+        {
+            return kind == key.kind && value == key.value;
+        }
+    };
+
+    struct ConstantKeyHash
+    {
+        size_t operator()(const ConstantKey& key) const
+        {
+            // finalizer from MurmurHash64B
+            const uint32_t m = 0x5bd1e995;
+
+            uint32_t h1 = uint32_t(key.value);
+            uint32_t h2 = uint32_t(key.value >> 32) ^ (int(key.kind) * m);
+
+            h1 ^= h2 >> 18;
+            h1 *= m;
+            h2 ^= h1 >> 22;
+            h2 *= m;
+            h1 ^= h2 >> 17;
+            h1 *= m;
+            h2 ^= h1 >> 19;
+            h2 *= m;
+
+            // ... truncated to 32-bit output (normally hash is equal to (uint64_t(h1) << 32) | h2, but we only really need the lower 32-bit half)
+            return size_t(h2);
+        }
+    };
+
+    DenseHashMap<ConstantKey, uint32_t, ConstantKeyHash> constantMap;
+};
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/IrCallWrapperX64.h
+++ b/luau/CodeGen/include/Luau/IrCallWrapperX64.h
@ -0,0 +1,84 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/AssemblyBuilderX64.h"
+#include "Luau/IrData.h"
+#include "Luau/OperandX64.h"
+#include "Luau/RegisterX64.h"
+
+#include <array>
+
+// TODO: call wrapper can be used to suggest target registers for ScopedRegX64 to compute data into argument registers directly
+
+namespace Luau
+{
+namespace CodeGen
+{
+namespace X64
+{
+
+struct IrRegAllocX64;
+struct ScopedRegX64;
+
+struct CallArgument
+{
+    SizeX64 targetSize = SizeX64::none;
+
+    OperandX64 source = noreg;
+    IrOp sourceOp;
+
+    OperandX64 target = noreg;
+    bool candidate = true;
+};
+
+class IrCallWrapperX64
+{
+public:
+    IrCallWrapperX64(IrRegAllocX64& regs, AssemblyBuilderX64& build, uint32_t instIdx = kInvalidInstIdx);
+
+    void addArgument(SizeX64 targetSize, OperandX64 source, IrOp sourceOp = {});
+    void addArgument(SizeX64 targetSize, ScopedRegX64& scopedReg);
+
+    void call(const OperandX64& func);
+
+    RegisterX64 suggestNextArgumentRegister(SizeX64 size) const;
+
+    IrRegAllocX64& regs;
+    AssemblyBuilderX64& build;
+    uint32_t instIdx = ~0u;
+
+private:
+    OperandX64 getNextArgumentTarget(SizeX64 size) const;
+    void countRegisterUses();
+    CallArgument* findNonInterferingArgument();
+    bool interferesWithOperand(const OperandX64& op, RegisterX64 reg) const;
+    bool interferesWithActiveSources(const CallArgument& targetArg, int targetArgIndex) const;
+    bool interferesWithActiveTarget(RegisterX64 sourceReg) const;
+    void moveToTarget(CallArgument& arg);
+    void freeSourceRegisters(CallArgument& arg);
+    void renameRegister(RegisterX64& target, RegisterX64 reg, RegisterX64 replacement);
+    void renameSourceRegisters(RegisterX64 reg, RegisterX64 replacement);
+    RegisterX64 findConflictingTarget() const;
+    void renameConflictingRegister(RegisterX64 conflict);
+
+    int getRegisterUses(RegisterX64 reg) const;
+    void addRegisterUse(RegisterX64 reg);
+    void removeRegisterUse(RegisterX64 reg);
+
+    static const int kMaxCallArguments = 6;
+    std::array<CallArgument, kMaxCallArguments> args;
+    int argCount = 0;
+
+    int gprPos = 0;
+    int xmmPos = 0;
+
+    OperandX64 funcOp;
+
+    // Internal counters for remaining register use counts
+    std::array<uint8_t, 16> gprUses;
+    std::array<uint8_t, 16> xmmUses;
+};
+
+} // namespace X64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/IrData.h
+++ b/luau/CodeGen/include/Luau/IrData.h
--- a/luau/CodeGen/include/Luau/IrDump.h
+++ b/luau/CodeGen/include/Luau/IrDump.h
@ -0,0 +1,45 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/IrData.h"
+
+#include <string>
+#include <vector>
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+struct CfgInfo;
+
+const char* getCmdName(IrCmd cmd);
+const char* getBlockKindName(IrBlockKind kind);
+
+struct IrToStringContext
+{
+    std::string& result;
+    const std::vector<IrBlock>& blocks;
+    const std::vector<IrConst>& constants;
+    const CfgInfo& cfg;
+};
+
+void toString(IrToStringContext& ctx, const IrInst& inst, uint32_t index);
+void toString(IrToStringContext& ctx, const IrBlock& block, uint32_t index); // Block title
+void toString(IrToStringContext& ctx, IrOp op);
+
+void toString(std::string& result, IrConst constant);
+
+void toStringDetailed(IrToStringContext& ctx, const IrBlock& block, uint32_t blockIdx, const IrInst& inst, uint32_t instIdx, bool includeUseInfo);
+void toStringDetailed(IrToStringContext& ctx, const IrBlock& block, uint32_t index, bool includeUseInfo); // Block title
+
+std::string toString(const IrFunction& function, bool includeUseInfo);
+
+std::string dump(const IrFunction& function);
+
+std::string toDot(const IrFunction& function, bool includeInst);
+
+std::string dumpDot(const IrFunction& function, bool includeInst);
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/IrRegAllocX64.h
+++ b/luau/CodeGen/include/Luau/IrRegAllocX64.h
@ -0,0 +1,121 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/AssemblyBuilderX64.h"
+#include "Luau/IrData.h"
+#include "Luau/RegisterX64.h"
+
+#include <array>
+#include <initializer_list>
+
+namespace Luau
+{
+namespace CodeGen
+{
+namespace X64
+{
+
+constexpr uint8_t kNoStackSlot = 0xff;
+
+struct IrSpillX64
+{
+    uint32_t instIdx = 0;
+    IrValueKind valueKind = IrValueKind::Unknown;
+
+    unsigned spillId = 0;
+
+    // Spill location can be a stack location or be empty
+    // When it's empty, it means that instruction value can be rematerialized
+    uint8_t stackSlot = kNoStackSlot;
+
+    RegisterX64 originalLoc = noreg;
+};
+
+struct IrRegAllocX64
+{
+    IrRegAllocX64(AssemblyBuilderX64& build, IrFunction& function);
+
+    RegisterX64 allocReg(SizeX64 size, uint32_t instIdx);
+    RegisterX64 allocRegOrReuse(SizeX64 size, uint32_t instIdx, std::initializer_list<IrOp> oprefs);
+    RegisterX64 takeReg(RegisterX64 reg, uint32_t instIdx);
+
+    void freeReg(RegisterX64 reg);
+    void freeLastUseReg(IrInst& target, uint32_t instIdx);
+    void freeLastUseRegs(const IrInst& inst, uint32_t instIdx);
+
+    bool isLastUseReg(const IrInst& target, uint32_t instIdx) const;
+
+    bool shouldFreeGpr(RegisterX64 reg) const;
+
+    unsigned findSpillStackSlot(IrValueKind valueKind);
+
+    IrOp getRestoreOp(const IrInst& inst) const;
+    bool hasRestoreOp(const IrInst& inst) const;
+    OperandX64 getRestoreAddress(const IrInst& inst, IrOp restoreOp);
+
+    // Register used by instruction is about to be freed, have to find a way to restore value later
+    void preserve(IrInst& inst);
+
+    void restore(IrInst& inst, bool intoOriginalLocation);
+
+    void preserveAndFreeInstValues();
+
+    uint32_t findInstructionWithFurthestNextUse(const std::array<uint32_t, 16>& regInstUsers) const;
+
+    void assertFree(RegisterX64 reg) const;
+    void assertAllFree() const;
+    void assertNoSpills() const;
+
+    AssemblyBuilderX64& build;
+    IrFunction& function;
+
+    uint32_t currInstIdx = ~0u;
+
+    std::array<bool, 16> freeGprMap;
+    std::array<uint32_t, 16> gprInstUsers;
+    std::array<bool, 16> freeXmmMap;
+    std::array<uint32_t, 16> xmmInstUsers;
+
+    std::bitset<256> usedSpillSlots;
+    unsigned maxUsedSlot = 0;
+    unsigned nextSpillId = 1;
+    std::vector<IrSpillX64> spills;
+};
+
+struct ScopedRegX64
+{
+    explicit ScopedRegX64(IrRegAllocX64& owner);
+    ScopedRegX64(IrRegAllocX64& owner, SizeX64 size);
+    ScopedRegX64(IrRegAllocX64& owner, RegisterX64 reg);
+    ~ScopedRegX64();
+
+    ScopedRegX64(const ScopedRegX64&) = delete;
+    ScopedRegX64& operator=(const ScopedRegX64&) = delete;
+
+    void alloc(SizeX64 size);
+    void free();
+
+    RegisterX64 release();
+
+    IrRegAllocX64& owner;
+    RegisterX64 reg;
+};
+
+// When IR instruction makes a call under a condition that's not reflected as a real branch in IR,
+// spilled values have to be restored to their exact original locations, so that both after a call
+// and after the skip, values are found in the same place
+struct ScopedSpills
+{
+    explicit ScopedSpills(IrRegAllocX64& owner);
+    ~ScopedSpills();
+
+    ScopedSpills(const ScopedSpills&) = delete;
+    ScopedSpills& operator=(const ScopedSpills&) = delete;
+
+    IrRegAllocX64& owner;
+    unsigned startSpillId = 0;
+};
+
+} // namespace X64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/IrUtils.h
+++ b/luau/CodeGen/include/Luau/IrUtils.h
@ -0,0 +1,258 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/Bytecode.h"
+#include "Luau/Common.h"
+#include "Luau/IrData.h"
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+struct IrBuilder;
+
+inline bool isJumpD(LuauOpcode op)
+{
+    switch (op)
+    {
+    case LOP_JUMP:
+    case LOP_JUMPIF:
+    case LOP_JUMPIFNOT:
+    case LOP_JUMPIFEQ:
+    case LOP_JUMPIFLE:
+    case LOP_JUMPIFLT:
+    case LOP_JUMPIFNOTEQ:
+    case LOP_JUMPIFNOTLE:
+    case LOP_JUMPIFNOTLT:
+    case LOP_FORNPREP:
+    case LOP_FORNLOOP:
+    case LOP_FORGPREP:
+    case LOP_FORGLOOP:
+    case LOP_FORGPREP_INEXT:
+    case LOP_FORGPREP_NEXT:
+    case LOP_JUMPBACK:
+    case LOP_JUMPXEQKNIL:
+    case LOP_JUMPXEQKB:
+    case LOP_JUMPXEQKN:
+    case LOP_JUMPXEQKS:
+        return true;
+
+    default:
+        return false;
+    }
+}
+
+inline bool isSkipC(LuauOpcode op)
+{
+    switch (op)
+    {
+    case LOP_LOADB:
+        return true;
+
+    default:
+        return false;
+    }
+}
+
+inline bool isFastCall(LuauOpcode op)
+{
+    switch (op)
+    {
+    case LOP_FASTCALL:
+    case LOP_FASTCALL1:
+    case LOP_FASTCALL2:
+    case LOP_FASTCALL2K:
+        return true;
+
+    default:
+        return false;
+    }
+}
+
+inline int getJumpTarget(uint32_t insn, uint32_t pc)
+{
+    LuauOpcode op = LuauOpcode(LUAU_INSN_OP(insn));
+
+    if (isJumpD(op))
+        return int(pc + LUAU_INSN_D(insn) + 1);
+    else if (isFastCall(op))
+        return int(pc + LUAU_INSN_C(insn) + 2);
+    else if (isSkipC(op) && LUAU_INSN_C(insn))
+        return int(pc + LUAU_INSN_C(insn) + 1);
+    else if (op == LOP_JUMPX)
+        return int(pc + LUAU_INSN_E(insn) + 1);
+    else
+        return -1;
+}
+
+inline bool isBlockTerminator(IrCmd cmd)
+{
+    switch (cmd)
+    {
+    case IrCmd::JUMP:
+    case IrCmd::JUMP_IF_TRUTHY:
+    case IrCmd::JUMP_IF_FALSY:
+    case IrCmd::JUMP_EQ_TAG:
+    case IrCmd::JUMP_EQ_INT:
+    case IrCmd::JUMP_LT_INT:
+    case IrCmd::JUMP_GE_UINT:
+    case IrCmd::JUMP_EQ_POINTER:
+    case IrCmd::JUMP_CMP_NUM:
+    case IrCmd::JUMP_CMP_ANY:
+    case IrCmd::JUMP_SLOT_MATCH:
+    case IrCmd::RETURN:
+    case IrCmd::FORGLOOP:
+    case IrCmd::FORGLOOP_FALLBACK:
+    case IrCmd::FORGPREP_XNEXT_FALLBACK:
+    case IrCmd::FALLBACK_FORGPREP:
+        return true;
+    default:
+        break;
+    }
+
+    return false;
+}
+
+inline bool isNonTerminatingJump(IrCmd cmd)
+{
+    switch (cmd)
+    {
+    case IrCmd::TRY_NUM_TO_INDEX:
+    case IrCmd::TRY_CALL_FASTGETTM:
+    case IrCmd::CHECK_FASTCALL_RES:
+    case IrCmd::CHECK_TAG:
+    case IrCmd::CHECK_READONLY:
+    case IrCmd::CHECK_NO_METATABLE:
+    case IrCmd::CHECK_SAFE_ENV:
+    case IrCmd::CHECK_ARRAY_SIZE:
+    case IrCmd::CHECK_SLOT_MATCH:
+    case IrCmd::CHECK_NODE_NO_NEXT:
+        return true;
+    default:
+        break;
+    }
+
+    return false;
+}
+
+inline bool hasResult(IrCmd cmd)
+{
+    switch (cmd)
+    {
+    case IrCmd::LOAD_TAG:
+    case IrCmd::LOAD_POINTER:
+    case IrCmd::LOAD_DOUBLE:
+    case IrCmd::LOAD_INT:
+    case IrCmd::LOAD_TVALUE:
+    case IrCmd::LOAD_NODE_VALUE_TV:
+    case IrCmd::LOAD_ENV:
+    case IrCmd::GET_ARR_ADDR:
+    case IrCmd::GET_SLOT_NODE_ADDR:
+    case IrCmd::GET_HASH_NODE_ADDR:
+    case IrCmd::ADD_INT:
+    case IrCmd::SUB_INT:
+    case IrCmd::ADD_NUM:
+    case IrCmd::SUB_NUM:
+    case IrCmd::MUL_NUM:
+    case IrCmd::DIV_NUM:
+    case IrCmd::MOD_NUM:
+    case IrCmd::MIN_NUM:
+    case IrCmd::MAX_NUM:
+    case IrCmd::UNM_NUM:
+    case IrCmd::FLOOR_NUM:
+    case IrCmd::CEIL_NUM:
+    case IrCmd::ROUND_NUM:
+    case IrCmd::SQRT_NUM:
+    case IrCmd::ABS_NUM:
+    case IrCmd::NOT_ANY:
+    case IrCmd::TABLE_LEN:
+    case IrCmd::NEW_TABLE:
+    case IrCmd::DUP_TABLE:
+    case IrCmd::TRY_NUM_TO_INDEX:
+    case IrCmd::TRY_CALL_FASTGETTM:
+    case IrCmd::INT_TO_NUM:
+    case IrCmd::UINT_TO_NUM:
+    case IrCmd::NUM_TO_INT:
+    case IrCmd::NUM_TO_UINT:
+    case IrCmd::SUBSTITUTE:
+    case IrCmd::INVOKE_FASTCALL:
+    case IrCmd::BITAND_UINT:
+    case IrCmd::BITXOR_UINT:
+    case IrCmd::BITOR_UINT:
+    case IrCmd::BITNOT_UINT:
+    case IrCmd::BITLSHIFT_UINT:
+    case IrCmd::BITRSHIFT_UINT:
+    case IrCmd::BITARSHIFT_UINT:
+    case IrCmd::BITLROTATE_UINT:
+    case IrCmd::BITRROTATE_UINT:
+    case IrCmd::BITCOUNTLZ_UINT:
+    case IrCmd::BITCOUNTRZ_UINT:
+    case IrCmd::INVOKE_LIBM:
+        return true;
+    default:
+        break;
+    }
+
+    return false;
+}
+
+inline bool hasSideEffects(IrCmd cmd)
+{
+    if (cmd == IrCmd::INVOKE_FASTCALL)
+        return true;
+
+    // Instructions that don't produce a result most likely have other side-effects to make them useful
+    // Right now, a full switch would mirror the 'hasResult' function, so we use this simple condition
+    return !hasResult(cmd);
+}
+
+inline bool isPseudo(IrCmd cmd)
+{
+    // Instructions that are used for internal needs and are not a part of final lowering
+    return cmd == IrCmd::NOP || cmd == IrCmd::SUBSTITUTE;
+}
+
+IrValueKind getCmdValueKind(IrCmd cmd);
+
+bool isGCO(uint8_t tag);
+
+// Manually add or remove use of an operand
+void addUse(IrFunction& function, IrOp op);
+void removeUse(IrFunction& function, IrOp op);
+
+// Remove a single instruction
+void kill(IrFunction& function, IrInst& inst);
+
+// Remove a range of instructions
+void kill(IrFunction& function, uint32_t start, uint32_t end);
+
+// Remove a block, including all instructions inside
+void kill(IrFunction& function, IrBlock& block);
+
+// Replace a single operand and update use counts (can cause chain removal of dead code)
+void replace(IrFunction& function, IrOp& original, IrOp replacement);
+
+// Replace a single instruction
+// Target instruction index instead of reference is used to handle introduction of a new block terminator
+void replace(IrFunction& function, IrBlock& block, uint32_t instIdx, IrInst replacement);
+
+// Replace instruction with a different value (using IrCmd::SUBSTITUTE)
+void substitute(IrFunction& function, IrInst& inst, IrOp replacement);
+
+// Replace instruction arguments that point to substitutions with target values
+void applySubstitutions(IrFunction& function, IrOp& op);
+void applySubstitutions(IrFunction& function, IrInst& inst);
+
+// Compare numbers using IR condition value
+bool compare(double a, double b, IrCondition cond);
+
+// Perform constant folding on instruction at index
+// For most instructions, successful folding results in a IrCmd::SUBSTITUTE
+// But it can also be successful on conditional control-flow, replacing it with an unconditional IrCmd::JUMP
+void foldConstants(IrBuilder& build, IrFunction& function, IrBlock& block, uint32_t instIdx);
+
+uint32_t getNativeContextOffset(int bfid);
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/Label.h
+++ b/luau/CodeGen/include/Luau/Label.h
@ -0,0 +1,18 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include <stdint.h>
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+struct Label
+{
+    uint32_t id = 0;
+    uint32_t location = ~0u;
+};
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/OperandX64.h
+++ b/luau/CodeGen/include/Luau/OperandX64.h
@ -0,0 +1,145 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/Common.h"
+#include "Luau/RegisterX64.h"
+
+#include <stdint.h>
+
+namespace Luau
+{
+namespace CodeGen
+{
+namespace X64
+{
+
+enum class CategoryX64 : uint8_t
+{
+    reg,
+    mem,
+    imm,
+};
+
+struct OperandX64
+{
+    constexpr OperandX64(RegisterX64 reg)
+        : cat(CategoryX64::reg)
+        , index(noreg)
+        , base(reg)
+        , memSize(SizeX64::none)
+        , scale(1)
+        , imm(0)
+    {
+    }
+
+    constexpr OperandX64(int32_t imm)
+        : cat(CategoryX64::imm)
+        , index(noreg)
+        , base(noreg)
+        , memSize(SizeX64::none)
+        , scale(1)
+        , imm(imm)
+    {
+    }
+
+    constexpr explicit OperandX64(SizeX64 size, RegisterX64 index, uint8_t scale, RegisterX64 base, int32_t disp)
+        : cat(CategoryX64::mem)
+        , index(index)
+        , base(base)
+        , memSize(size)
+        , scale(scale)
+        , imm(disp)
+    {
+    }
+
+    // Fields are carefully placed to make this struct fit into an 8 byte register
+    CategoryX64 cat;
+    RegisterX64 index;
+    RegisterX64 base;
+    SizeX64 memSize : 4;
+    uint8_t scale : 4;
+    int32_t imm;
+
+    constexpr OperandX64 operator[](OperandX64&& addr) const
+    {
+        LUAU_ASSERT(cat == CategoryX64::mem);
+        LUAU_ASSERT(index == noreg && scale == 1 && base == noreg && imm == 0);
+        LUAU_ASSERT(addr.memSize == SizeX64::none);
+
+        addr.cat = CategoryX64::mem;
+        addr.memSize = memSize;
+        return addr;
+    }
+};
+
+constexpr OperandX64 addr{SizeX64::none, noreg, 1, noreg, 0};
+constexpr OperandX64 byte{SizeX64::byte, noreg, 1, noreg, 0};
+constexpr OperandX64 word{SizeX64::word, noreg, 1, noreg, 0};
+constexpr OperandX64 dword{SizeX64::dword, noreg, 1, noreg, 0};
+constexpr OperandX64 qword{SizeX64::qword, noreg, 1, noreg, 0};
+constexpr OperandX64 xmmword{SizeX64::xmmword, noreg, 1, noreg, 0};
+constexpr OperandX64 ymmword{SizeX64::ymmword, noreg, 1, noreg, 0};
+
+constexpr OperandX64 operator*(RegisterX64 reg, uint8_t scale)
+{
+    if (scale == 1)
+        return OperandX64(reg);
+
+    LUAU_ASSERT(scale == 1 || scale == 2 || scale == 4 || scale == 8);
+    LUAU_ASSERT(reg.index != 0b100 && "can't scale SP");
+
+    return OperandX64(SizeX64::none, reg, scale, noreg, 0);
+}
+
+constexpr OperandX64 operator+(RegisterX64 reg, int32_t disp)
+{
+    return OperandX64(SizeX64::none, noreg, 1, reg, disp);
+}
+
+constexpr OperandX64 operator-(RegisterX64 reg, int32_t disp)
+{
+    return OperandX64(SizeX64::none, noreg, 1, reg, -disp);
+}
+
+constexpr OperandX64 operator+(RegisterX64 base, RegisterX64 index)
+{
+    LUAU_ASSERT(index.index != 4 && "sp cannot be used as index");
+    LUAU_ASSERT(base.size == index.size);
+
+    return OperandX64(SizeX64::none, index, 1, base, 0);
+}
+
+constexpr OperandX64 operator+(OperandX64 op, int32_t disp)
+{
+    LUAU_ASSERT(op.cat == CategoryX64::mem);
+    LUAU_ASSERT(op.memSize == SizeX64::none);
+
+    op.imm += disp;
+    return op;
+}
+
+constexpr OperandX64 operator+(OperandX64 op, RegisterX64 base)
+{
+    LUAU_ASSERT(op.cat == CategoryX64::mem);
+    LUAU_ASSERT(op.memSize == SizeX64::none);
+    LUAU_ASSERT(op.base == noreg);
+    LUAU_ASSERT(op.index == noreg || op.index.size == base.size);
+
+    op.base = base;
+    return op;
+}
+
+constexpr OperandX64 operator+(RegisterX64 base, OperandX64 op)
+{
+    LUAU_ASSERT(op.cat == CategoryX64::mem);
+    LUAU_ASSERT(op.memSize == SizeX64::none);
+    LUAU_ASSERT(op.base == noreg);
+    LUAU_ASSERT(op.index == noreg || op.index.size == base.size);
+
+    op.base = base;
+    return op;
+}
+
+} // namespace X64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/OptimizeConstProp.h
+++ b/luau/CodeGen/include/Luau/OptimizeConstProp.h
@ -0,0 +1,17 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/IrData.h"
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+struct IrBuilder;
+
+void constPropInBlockChains(IrBuilder& build, bool useValueNumbering);
+void createLinearBlocks(IrBuilder& build, bool useValueNumbering);
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/OptimizeFinalX64.h
+++ b/luau/CodeGen/include/Luau/OptimizeFinalX64.h
@ -0,0 +1,14 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/IrData.h"
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+void optimizeMemoryOperandsX64(IrFunction& function);
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/RegisterA64.h
+++ b/luau/CodeGen/include/Luau/RegisterA64.h
@ -0,0 +1,233 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/Common.h"
+
+#include <stdint.h>
+
+namespace Luau
+{
+namespace CodeGen
+{
+namespace A64
+{
+
+enum class KindA64 : uint8_t
+{
+    none,
+    w, // 32-bit GPR
+    x, // 64-bit GPR
+    s, // 32-bit SIMD&FP scalar
+    d, // 64-bit SIMD&FP scalar
+    q, // 128-bit SIMD&FP vector
+};
+
+struct RegisterA64
+{
+    KindA64 kind : 3;
+    uint8_t index : 5;
+
+    constexpr bool operator==(RegisterA64 rhs) const
+    {
+        return kind == rhs.kind && index == rhs.index;
+    }
+
+    constexpr bool operator!=(RegisterA64 rhs) const
+    {
+        return !(*this == rhs);
+    }
+};
+
+constexpr RegisterA64 castReg(KindA64 kind, RegisterA64 reg)
+{
+    LUAU_ASSERT(kind != reg.kind);
+    LUAU_ASSERT(kind != KindA64::none && reg.kind != KindA64::none);
+    LUAU_ASSERT((kind == KindA64::w || kind == KindA64::x) == (reg.kind == KindA64::w || reg.kind == KindA64::x));
+
+    return RegisterA64{kind, reg.index};
+}
+
+// This is equivalent to castReg(KindA64::x), but is separate because it implies different semantics
+// Specifically, there are cases when it's useful to treat a wN register as an xN register *after* it has been assigned a value
+// Since all A64 instructions that write to wN implicitly zero the top half, this works when we need zero extension semantics
+// Crucially, this is *not* safe on an ABI boundary - an int parameter in wN register may have anything in its top half in certain cases
+// However, as long as our codegen doesn't use 32-bit truncation by using castReg x=>w, we can safely rely on this.
+constexpr RegisterA64 zextReg(RegisterA64 reg)
+{
+    LUAU_ASSERT(reg.kind == KindA64::w);
+
+    return RegisterA64{KindA64::x, reg.index};
+}
+
+constexpr RegisterA64 noreg{KindA64::none, 0};
+
+constexpr RegisterA64 w0{KindA64::w, 0};
+constexpr RegisterA64 w1{KindA64::w, 1};
+constexpr RegisterA64 w2{KindA64::w, 2};
+constexpr RegisterA64 w3{KindA64::w, 3};
+constexpr RegisterA64 w4{KindA64::w, 4};
+constexpr RegisterA64 w5{KindA64::w, 5};
+constexpr RegisterA64 w6{KindA64::w, 6};
+constexpr RegisterA64 w7{KindA64::w, 7};
+constexpr RegisterA64 w8{KindA64::w, 8};
+constexpr RegisterA64 w9{KindA64::w, 9};
+constexpr RegisterA64 w10{KindA64::w, 10};
+constexpr RegisterA64 w11{KindA64::w, 11};
+constexpr RegisterA64 w12{KindA64::w, 12};
+constexpr RegisterA64 w13{KindA64::w, 13};
+constexpr RegisterA64 w14{KindA64::w, 14};
+constexpr RegisterA64 w15{KindA64::w, 15};
+constexpr RegisterA64 w16{KindA64::w, 16};
+constexpr RegisterA64 w17{KindA64::w, 17};
+constexpr RegisterA64 w18{KindA64::w, 18};
+constexpr RegisterA64 w19{KindA64::w, 19};
+constexpr RegisterA64 w20{KindA64::w, 20};
+constexpr RegisterA64 w21{KindA64::w, 21};
+constexpr RegisterA64 w22{KindA64::w, 22};
+constexpr RegisterA64 w23{KindA64::w, 23};
+constexpr RegisterA64 w24{KindA64::w, 24};
+constexpr RegisterA64 w25{KindA64::w, 25};
+constexpr RegisterA64 w26{KindA64::w, 26};
+constexpr RegisterA64 w27{KindA64::w, 27};
+constexpr RegisterA64 w28{KindA64::w, 28};
+constexpr RegisterA64 w29{KindA64::w, 29};
+constexpr RegisterA64 w30{KindA64::w, 30};
+constexpr RegisterA64 wzr{KindA64::w, 31};
+
+constexpr RegisterA64 x0{KindA64::x, 0};
+constexpr RegisterA64 x1{KindA64::x, 1};
+constexpr RegisterA64 x2{KindA64::x, 2};
+constexpr RegisterA64 x3{KindA64::x, 3};
+constexpr RegisterA64 x4{KindA64::x, 4};
+constexpr RegisterA64 x5{KindA64::x, 5};
+constexpr RegisterA64 x6{KindA64::x, 6};
+constexpr RegisterA64 x7{KindA64::x, 7};
+constexpr RegisterA64 x8{KindA64::x, 8};
+constexpr RegisterA64 x9{KindA64::x, 9};
+constexpr RegisterA64 x10{KindA64::x, 10};
+constexpr RegisterA64 x11{KindA64::x, 11};
+constexpr RegisterA64 x12{KindA64::x, 12};
+constexpr RegisterA64 x13{KindA64::x, 13};
+constexpr RegisterA64 x14{KindA64::x, 14};
+constexpr RegisterA64 x15{KindA64::x, 15};
+constexpr RegisterA64 x16{KindA64::x, 16};
+constexpr RegisterA64 x17{KindA64::x, 17};
+constexpr RegisterA64 x18{KindA64::x, 18};
+constexpr RegisterA64 x19{KindA64::x, 19};
+constexpr RegisterA64 x20{KindA64::x, 20};
+constexpr RegisterA64 x21{KindA64::x, 21};
+constexpr RegisterA64 x22{KindA64::x, 22};
+constexpr RegisterA64 x23{KindA64::x, 23};
+constexpr RegisterA64 x24{KindA64::x, 24};
+constexpr RegisterA64 x25{KindA64::x, 25};
+constexpr RegisterA64 x26{KindA64::x, 26};
+constexpr RegisterA64 x27{KindA64::x, 27};
+constexpr RegisterA64 x28{KindA64::x, 28};
+constexpr RegisterA64 x29{KindA64::x, 29};
+constexpr RegisterA64 x30{KindA64::x, 30};
+constexpr RegisterA64 xzr{KindA64::x, 31};
+
+constexpr RegisterA64 sp{KindA64::none, 31};
+
+constexpr RegisterA64 s0{KindA64::s, 0};
+constexpr RegisterA64 s1{KindA64::s, 1};
+constexpr RegisterA64 s2{KindA64::s, 2};
+constexpr RegisterA64 s3{KindA64::s, 3};
+constexpr RegisterA64 s4{KindA64::s, 4};
+constexpr RegisterA64 s5{KindA64::s, 5};
+constexpr RegisterA64 s6{KindA64::s, 6};
+constexpr RegisterA64 s7{KindA64::s, 7};
+constexpr RegisterA64 s8{KindA64::s, 8};
+constexpr RegisterA64 s9{KindA64::s, 9};
+constexpr RegisterA64 s10{KindA64::s, 10};
+constexpr RegisterA64 s11{KindA64::s, 11};
+constexpr RegisterA64 s12{KindA64::s, 12};
+constexpr RegisterA64 s13{KindA64::s, 13};
+constexpr RegisterA64 s14{KindA64::s, 14};
+constexpr RegisterA64 s15{KindA64::s, 15};
+constexpr RegisterA64 s16{KindA64::s, 16};
+constexpr RegisterA64 s17{KindA64::s, 17};
+constexpr RegisterA64 s18{KindA64::s, 18};
+constexpr RegisterA64 s19{KindA64::s, 19};
+constexpr RegisterA64 s20{KindA64::s, 20};
+constexpr RegisterA64 s21{KindA64::s, 21};
+constexpr RegisterA64 s22{KindA64::s, 22};
+constexpr RegisterA64 s23{KindA64::s, 23};
+constexpr RegisterA64 s24{KindA64::s, 24};
+constexpr RegisterA64 s25{KindA64::s, 25};
+constexpr RegisterA64 s26{KindA64::s, 26};
+constexpr RegisterA64 s27{KindA64::s, 27};
+constexpr RegisterA64 s28{KindA64::s, 28};
+constexpr RegisterA64 s29{KindA64::s, 29};
+constexpr RegisterA64 s30{KindA64::s, 30};
+constexpr RegisterA64 s31{KindA64::s, 31};
+
+constexpr RegisterA64 d0{KindA64::d, 0};
+constexpr RegisterA64 d1{KindA64::d, 1};
+constexpr RegisterA64 d2{KindA64::d, 2};
+constexpr RegisterA64 d3{KindA64::d, 3};
+constexpr RegisterA64 d4{KindA64::d, 4};
+constexpr RegisterA64 d5{KindA64::d, 5};
+constexpr RegisterA64 d6{KindA64::d, 6};
+constexpr RegisterA64 d7{KindA64::d, 7};
+constexpr RegisterA64 d8{KindA64::d, 8};
+constexpr RegisterA64 d9{KindA64::d, 9};
+constexpr RegisterA64 d10{KindA64::d, 10};
+constexpr RegisterA64 d11{KindA64::d, 11};
+constexpr RegisterA64 d12{KindA64::d, 12};
+constexpr RegisterA64 d13{KindA64::d, 13};
+constexpr RegisterA64 d14{KindA64::d, 14};
+constexpr RegisterA64 d15{KindA64::d, 15};
+constexpr RegisterA64 d16{KindA64::d, 16};
+constexpr RegisterA64 d17{KindA64::d, 17};
+constexpr RegisterA64 d18{KindA64::d, 18};
+constexpr RegisterA64 d19{KindA64::d, 19};
+constexpr RegisterA64 d20{KindA64::d, 20};
+constexpr RegisterA64 d21{KindA64::d, 21};
+constexpr RegisterA64 d22{KindA64::d, 22};
+constexpr RegisterA64 d23{KindA64::d, 23};
+constexpr RegisterA64 d24{KindA64::d, 24};
+constexpr RegisterA64 d25{KindA64::d, 25};
+constexpr RegisterA64 d26{KindA64::d, 26};
+constexpr RegisterA64 d27{KindA64::d, 27};
+constexpr RegisterA64 d28{KindA64::d, 28};
+constexpr RegisterA64 d29{KindA64::d, 29};
+constexpr RegisterA64 d30{KindA64::d, 30};
+constexpr RegisterA64 d31{KindA64::d, 31};
+
+constexpr RegisterA64 q0{KindA64::q, 0};
+constexpr RegisterA64 q1{KindA64::q, 1};
+constexpr RegisterA64 q2{KindA64::q, 2};
+constexpr RegisterA64 q3{KindA64::q, 3};
+constexpr RegisterA64 q4{KindA64::q, 4};
+constexpr RegisterA64 q5{KindA64::q, 5};
+constexpr RegisterA64 q6{KindA64::q, 6};
+constexpr RegisterA64 q7{KindA64::q, 7};
+constexpr RegisterA64 q8{KindA64::q, 8};
+constexpr RegisterA64 q9{KindA64::q, 9};
+constexpr RegisterA64 q10{KindA64::q, 10};
+constexpr RegisterA64 q11{KindA64::q, 11};
+constexpr RegisterA64 q12{KindA64::q, 12};
+constexpr RegisterA64 q13{KindA64::q, 13};
+constexpr RegisterA64 q14{KindA64::q, 14};
+constexpr RegisterA64 q15{KindA64::q, 15};
+constexpr RegisterA64 q16{KindA64::q, 16};
+constexpr RegisterA64 q17{KindA64::q, 17};
+constexpr RegisterA64 q18{KindA64::q, 18};
+constexpr RegisterA64 q19{KindA64::q, 19};
+constexpr RegisterA64 q20{KindA64::q, 20};
+constexpr RegisterA64 q21{KindA64::q, 21};
+constexpr RegisterA64 q22{KindA64::q, 22};
+constexpr RegisterA64 q23{KindA64::q, 23};
+constexpr RegisterA64 q24{KindA64::q, 24};
+constexpr RegisterA64 q25{KindA64::q, 25};
+constexpr RegisterA64 q26{KindA64::q, 26};
+constexpr RegisterA64 q27{KindA64::q, 27};
+constexpr RegisterA64 q28{KindA64::q, 28};
+constexpr RegisterA64 q29{KindA64::q, 29};
+constexpr RegisterA64 q30{KindA64::q, 30};
+constexpr RegisterA64 q31{KindA64::q, 31};
+
+} // namespace A64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/RegisterX64.h
+++ b/luau/CodeGen/include/Luau/RegisterX64.h
@ -0,0 +1,152 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/Common.h"
+
+#include <stdint.h>
+
+namespace Luau
+{
+namespace CodeGen
+{
+namespace X64
+{
+
+enum class SizeX64 : uint8_t
+{
+    none,
+    byte,
+    word,
+    dword,
+    qword,
+    xmmword,
+    ymmword,
+};
+
+struct RegisterX64
+{
+    SizeX64 size : 3;
+    uint8_t index : 5;
+
+    constexpr bool operator==(RegisterX64 rhs) const
+    {
+        return size == rhs.size && index == rhs.index;
+    }
+
+    constexpr bool operator!=(RegisterX64 rhs) const
+    {
+        return !(*this == rhs);
+    }
+};
+
+constexpr RegisterX64 noreg{SizeX64::none, 16};
+constexpr RegisterX64 rip{SizeX64::none, 0};
+
+constexpr RegisterX64 al{SizeX64::byte, 0};
+constexpr RegisterX64 cl{SizeX64::byte, 1};
+constexpr RegisterX64 dl{SizeX64::byte, 2};
+constexpr RegisterX64 bl{SizeX64::byte, 3};
+constexpr RegisterX64 spl{SizeX64::byte, 4};
+constexpr RegisterX64 bpl{SizeX64::byte, 5};
+constexpr RegisterX64 sil{SizeX64::byte, 6};
+constexpr RegisterX64 dil{SizeX64::byte, 7};
+constexpr RegisterX64 r8b{SizeX64::byte, 8};
+constexpr RegisterX64 r9b{SizeX64::byte, 9};
+constexpr RegisterX64 r10b{SizeX64::byte, 10};
+constexpr RegisterX64 r11b{SizeX64::byte, 11};
+constexpr RegisterX64 r12b{SizeX64::byte, 12};
+constexpr RegisterX64 r13b{SizeX64::byte, 13};
+constexpr RegisterX64 r14b{SizeX64::byte, 14};
+constexpr RegisterX64 r15b{SizeX64::byte, 15};
+
+constexpr RegisterX64 eax{SizeX64::dword, 0};
+constexpr RegisterX64 ecx{SizeX64::dword, 1};
+constexpr RegisterX64 edx{SizeX64::dword, 2};
+constexpr RegisterX64 ebx{SizeX64::dword, 3};
+constexpr RegisterX64 esp{SizeX64::dword, 4};
+constexpr RegisterX64 ebp{SizeX64::dword, 5};
+constexpr RegisterX64 esi{SizeX64::dword, 6};
+constexpr RegisterX64 edi{SizeX64::dword, 7};
+constexpr RegisterX64 r8d{SizeX64::dword, 8};
+constexpr RegisterX64 r9d{SizeX64::dword, 9};
+constexpr RegisterX64 r10d{SizeX64::dword, 10};
+constexpr RegisterX64 r11d{SizeX64::dword, 11};
+constexpr RegisterX64 r12d{SizeX64::dword, 12};
+constexpr RegisterX64 r13d{SizeX64::dword, 13};
+constexpr RegisterX64 r14d{SizeX64::dword, 14};
+constexpr RegisterX64 r15d{SizeX64::dword, 15};
+
+constexpr RegisterX64 rax{SizeX64::qword, 0};
+constexpr RegisterX64 rcx{SizeX64::qword, 1};
+constexpr RegisterX64 rdx{SizeX64::qword, 2};
+constexpr RegisterX64 rbx{SizeX64::qword, 3};
+constexpr RegisterX64 rsp{SizeX64::qword, 4};
+constexpr RegisterX64 rbp{SizeX64::qword, 5};
+constexpr RegisterX64 rsi{SizeX64::qword, 6};
+constexpr RegisterX64 rdi{SizeX64::qword, 7};
+constexpr RegisterX64 r8{SizeX64::qword, 8};
+constexpr RegisterX64 r9{SizeX64::qword, 9};
+constexpr RegisterX64 r10{SizeX64::qword, 10};
+constexpr RegisterX64 r11{SizeX64::qword, 11};
+constexpr RegisterX64 r12{SizeX64::qword, 12};
+constexpr RegisterX64 r13{SizeX64::qword, 13};
+constexpr RegisterX64 r14{SizeX64::qword, 14};
+constexpr RegisterX64 r15{SizeX64::qword, 15};
+
+constexpr RegisterX64 xmm0{SizeX64::xmmword, 0};
+constexpr RegisterX64 xmm1{SizeX64::xmmword, 1};
+constexpr RegisterX64 xmm2{SizeX64::xmmword, 2};
+constexpr RegisterX64 xmm3{SizeX64::xmmword, 3};
+constexpr RegisterX64 xmm4{SizeX64::xmmword, 4};
+constexpr RegisterX64 xmm5{SizeX64::xmmword, 5};
+constexpr RegisterX64 xmm6{SizeX64::xmmword, 6};
+constexpr RegisterX64 xmm7{SizeX64::xmmword, 7};
+constexpr RegisterX64 xmm8{SizeX64::xmmword, 8};
+constexpr RegisterX64 xmm9{SizeX64::xmmword, 9};
+constexpr RegisterX64 xmm10{SizeX64::xmmword, 10};
+constexpr RegisterX64 xmm11{SizeX64::xmmword, 11};
+constexpr RegisterX64 xmm12{SizeX64::xmmword, 12};
+constexpr RegisterX64 xmm13{SizeX64::xmmword, 13};
+constexpr RegisterX64 xmm14{SizeX64::xmmword, 14};
+constexpr RegisterX64 xmm15{SizeX64::xmmword, 15};
+
+constexpr RegisterX64 ymm0{SizeX64::ymmword, 0};
+constexpr RegisterX64 ymm1{SizeX64::ymmword, 1};
+constexpr RegisterX64 ymm2{SizeX64::ymmword, 2};
+constexpr RegisterX64 ymm3{SizeX64::ymmword, 3};
+constexpr RegisterX64 ymm4{SizeX64::ymmword, 4};
+constexpr RegisterX64 ymm5{SizeX64::ymmword, 5};
+constexpr RegisterX64 ymm6{SizeX64::ymmword, 6};
+constexpr RegisterX64 ymm7{SizeX64::ymmword, 7};
+constexpr RegisterX64 ymm8{SizeX64::ymmword, 8};
+constexpr RegisterX64 ymm9{SizeX64::ymmword, 9};
+constexpr RegisterX64 ymm10{SizeX64::ymmword, 10};
+constexpr RegisterX64 ymm11{SizeX64::ymmword, 11};
+constexpr RegisterX64 ymm12{SizeX64::ymmword, 12};
+constexpr RegisterX64 ymm13{SizeX64::ymmword, 13};
+constexpr RegisterX64 ymm14{SizeX64::ymmword, 14};
+constexpr RegisterX64 ymm15{SizeX64::ymmword, 15};
+
+constexpr RegisterX64 byteReg(RegisterX64 reg)
+{
+    return RegisterX64{SizeX64::byte, reg.index};
+}
+
+constexpr RegisterX64 wordReg(RegisterX64 reg)
+{
+    return RegisterX64{SizeX64::word, reg.index};
+}
+
+constexpr RegisterX64 dwordReg(RegisterX64 reg)
+{
+    return RegisterX64{SizeX64::dword, reg.index};
+}
+
+constexpr RegisterX64 qwordReg(RegisterX64 reg)
+{
+    return RegisterX64{SizeX64::qword, reg.index};
+}
+
+} // namespace X64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/UnwindBuilder.h
+++ b/luau/CodeGen/include/Luau/UnwindBuilder.h
@ -0,0 +1,61 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/RegisterA64.h"
+#include "Luau/RegisterX64.h"
+
+#include <initializer_list>
+
+#include <stddef.h>
+#include <stdint.h>
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+// This value is used in 'finishFunction' to mark the function that spans to the end of the whole code block
+static uint32_t kFullBlockFuncton = ~0u;
+
+class UnwindBuilder
+{
+public:
+    enum Arch
+    {
+        X64,
+        A64
+    };
+
+    virtual ~UnwindBuilder() = default;
+
+    virtual void setBeginOffset(size_t beginOffset) = 0;
+    virtual size_t getBeginOffset() const = 0;
+
+    virtual void startInfo(Arch arch) = 0;
+    virtual void startFunction() = 0;
+    virtual void finishFunction(uint32_t beginOffset, uint32_t endOffset) = 0;
+    virtual void finishInfo() = 0;
+
+    // A64-specific; prologue must look like this:
+    //   sub sp, sp, stackSize
+    //   store sequence that saves regs to [sp..sp+regs.size*8) in the order specified in regs; regs should start with x29, x30 (fp, lr)
+    //   mov x29, sp
+    virtual void prologueA64(uint32_t prologueSize, uint32_t stackSize, std::initializer_list<A64::RegisterA64> regs) = 0;
+
+    // X64-specific; prologue must look like this:
+    //   optional, indicated by setupFrame:
+    //     push rbp
+    //     mov rbp, rsp
+    //   push reg in the order specified in regs
+    //   sub rsp, stackSize
+    virtual void prologueX64(uint32_t prologueSize, uint32_t stackSize, bool setupFrame, std::initializer_list<X64::RegisterX64> regs) = 0;
+
+    virtual size_t getSize() const = 0;
+    virtual size_t getFunctionCount() const = 0;
+
+    // This will place the unwinding data at the target address and might update values of some fields
+    virtual void finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const = 0;
+};
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/UnwindBuilderDwarf2.h
+++ b/luau/CodeGen/include/Luau/UnwindBuilderDwarf2.h
@ -0,0 +1,54 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/RegisterX64.h"
+#include "UnwindBuilder.h"
+
+#include <vector>
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+struct UnwindFunctionDwarf2
+{
+    uint32_t beginOffset;
+    uint32_t endOffset;
+    uint32_t fdeEntryStartPos;
+};
+
+class UnwindBuilderDwarf2 : public UnwindBuilder
+{
+public:
+    void setBeginOffset(size_t beginOffset) override;
+    size_t getBeginOffset() const override;
+
+    void startInfo(Arch arch) override;
+    void startFunction() override;
+    void finishFunction(uint32_t beginOffset, uint32_t endOffset) override;
+    void finishInfo() override;
+
+    void prologueA64(uint32_t prologueSize, uint32_t stackSize, std::initializer_list<A64::RegisterA64> regs) override;
+    void prologueX64(uint32_t prologueSize, uint32_t stackSize, bool setupFrame, std::initializer_list<X64::RegisterX64> regs) override;
+
+    size_t getSize() const override;
+    size_t getFunctionCount() const override;
+
+    void finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const override;
+
+private:
+    size_t beginOffset = 0;
+
+    std::vector<UnwindFunctionDwarf2> unwindFunctions;
+
+    static const unsigned kRawDataLimit = 1024;
+    uint8_t rawData[kRawDataLimit];
+    uint8_t* pos = rawData;
+
+    // We will remember the FDE location to write some of the fields like entry length, function start and size later
+    uint8_t* fdeEntryStart = nullptr;
+};
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/Luau/UnwindBuilderWin.h
+++ b/luau/CodeGen/include/Luau/UnwindBuilderWin.h
@ -0,0 +1,78 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/RegisterX64.h"
+#include "UnwindBuilder.h"
+
+#include <vector>
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+// This struct matches the layout of x64 RUNTIME_FUNCTION from winnt.h
+struct UnwindFunctionWin
+{
+    uint32_t beginOffset;
+    uint32_t endOffset;
+    uint32_t unwindInfoOffset;
+};
+
+// This struct matches the layout of x64 UNWIND_INFO from ehdata.h
+struct UnwindInfoWin
+{
+    uint8_t version : 3;
+    uint8_t flags : 5;
+    uint8_t prologsize;
+    uint8_t unwindcodecount;
+    uint8_t framereg : 4;
+    uint8_t frameregoff : 4;
+};
+
+// This struct matches the layout of UNWIND_CODE from ehdata.h
+struct UnwindCodeWin
+{
+    uint8_t offset;
+    uint8_t opcode : 4;
+    uint8_t opinfo : 4;
+};
+
+class UnwindBuilderWin : public UnwindBuilder
+{
+public:
+    void setBeginOffset(size_t beginOffset) override;
+    size_t getBeginOffset() const override;
+
+    void startInfo(Arch arch) override;
+    void startFunction() override;
+    void finishFunction(uint32_t beginOffset, uint32_t endOffset) override;
+    void finishInfo() override;
+
+    void prologueA64(uint32_t prologueSize, uint32_t stackSize, std::initializer_list<A64::RegisterA64> regs) override;
+    void prologueX64(uint32_t prologueSize, uint32_t stackSize, bool setupFrame, std::initializer_list<X64::RegisterX64> regs) override;
+
+    size_t getSize() const override;
+    size_t getFunctionCount() const override;
+
+    void finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const override;
+
+private:
+    size_t beginOffset = 0;
+
+    static const unsigned kRawDataLimit = 1024;
+    uint8_t rawData[kRawDataLimit];
+    uint8_t* rawDataPos = rawData;
+
+    std::vector<UnwindFunctionWin> unwindFunctions;
+
+    // Windows unwind codes are written in reverse, so we have to collect them all first
+    std::vector<UnwindCodeWin> unwindCodes;
+
+    uint8_t prologSize = 0;
+    X64::RegisterX64 frameReg = X64::noreg;
+    uint8_t frameRegOffset = 0;
+};
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/include/luacodegen.h
+++ b/luau/CodeGen/include/luacodegen.h
@ -0,0 +1,18 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+// Can be used to reconfigure visibility/exports for public APIs
+#ifndef LUACODEGEN_API
+#define LUACODEGEN_API extern
+#endif
+
+struct lua_State;
+
+// returns 1 if Luau code generator is supported, 0 otherwise
+LUACODEGEN_API int luau_codegen_supported(void);
+
+// create an instance of Luau code generator. you must check that this feature is supported using luau_codegen_supported().
+LUACODEGEN_API void luau_codegen_create(lua_State* L);
+
+// build target function and all inner functions
+LUACODEGEN_API void luau_codegen_compile(lua_State* L, int idx);
--- a/luau/CodeGen/src/AssemblyBuilderA64.cpp
+++ b/luau/CodeGen/src/AssemblyBuilderA64.cpp
--- a/luau/CodeGen/src/AssemblyBuilderX64.cpp
+++ b/luau/CodeGen/src/AssemblyBuilderX64.cpp
--- a/luau/CodeGen/src/BitUtils.h
+++ b/luau/CodeGen/src/BitUtils.h
@ -0,0 +1,56 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include <stdint.h>
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+inline int countlz(uint32_t n)
+{
+#ifdef _MSC_VER
+    unsigned long rl;
+    return _BitScanReverse(&rl, n) ? 31 - int(rl) : 32;
+#else
+    return n == 0 ? 32 : __builtin_clz(n);
+#endif
+}
+
+inline int countrz(uint32_t n)
+{
+#ifdef _MSC_VER
+    unsigned long rl;
+    return _BitScanForward(&rl, n) ? int(rl) : 32;
+#else
+    return n == 0 ? 32 : __builtin_ctz(n);
+#endif
+}
+
+inline int lrotate(uint32_t u, int s)
+{
+    // MSVC doesn't recognize the rotate form that is UB-safe
+#ifdef _MSC_VER
+    return _rotl(u, s);
+#else
+    return (u << (s & 31)) | (u >> ((32 - s) & 31));
+#endif
+}
+
+inline int rrotate(uint32_t u, int s)
+{
+    // MSVC doesn't recognize the rotate form that is UB-safe
+#ifdef _MSC_VER
+    return _rotr(u, s);
+#else
+    return (u >> (s & 31)) | (u << ((32 - s) & 31));
+#endif
+}
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/ByteUtils.h
+++ b/luau/CodeGen/src/ByteUtils.h
@ -0,0 +1,80 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/Common.h"
+
+#if defined(LUAU_BIG_ENDIAN)
+#include <endian.h>
+#endif
+
+#include <string.h>
+
+inline uint8_t* writeu8(uint8_t* target, uint8_t value)
+{
+    *target = value;
+    return target + sizeof(value);
+}
+
+inline uint8_t* writeu32(uint8_t* target, uint32_t value)
+{
+#if defined(LUAU_BIG_ENDIAN)
+    value = htole32(value);
+#endif
+
+    memcpy(target, &value, sizeof(value));
+    return target + sizeof(value);
+}
+
+inline uint8_t* writeu64(uint8_t* target, uint64_t value)
+{
+#if defined(LUAU_BIG_ENDIAN)
+    value = htole64(value);
+#endif
+
+    memcpy(target, &value, sizeof(value));
+    return target + sizeof(value);
+}
+
+inline uint8_t* writeuleb128(uint8_t* target, uint64_t value)
+{
+    do
+    {
+        uint8_t byte = value & 0x7f;
+        value >>= 7;
+
+        if (value)
+            byte |= 0x80;
+
+        *target++ = byte;
+    } while (value);
+
+    return target;
+}
+
+inline uint8_t* writef32(uint8_t* target, float value)
+{
+#if defined(LUAU_BIG_ENDIAN)
+    static_assert(sizeof(float) == sizeof(uint32_t), "type size must match to reinterpret data");
+    uint32_t data;
+    memcpy(&data, &value, sizeof(value));
+    writeu32(target, data);
+#else
+    memcpy(target, &value, sizeof(value));
+#endif
+
+    return target + sizeof(value);
+}
+
+inline uint8_t* writef64(uint8_t* target, double value)
+{
+#if defined(LUAU_BIG_ENDIAN)
+    static_assert(sizeof(double) == sizeof(uint64_t), "type size must match to reinterpret data");
+    uint64_t data;
+    memcpy(&data, &value, sizeof(value));
+    writeu64(target, data);
+#else
+    memcpy(target, &value, sizeof(value));
+#endif
+
+    return target + sizeof(value);
+}
--- a/luau/CodeGen/src/CodeAllocator.cpp
+++ b/luau/CodeGen/src/CodeAllocator.cpp
@ -0,0 +1,209 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "Luau/CodeAllocator.h"
+
+#include "Luau/Common.h"
+
+#include <string.h>
+
+#if defined(_WIN32)
+
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+#include <Windows.h>
+
+const size_t kPageSize = 4096;
+#else
+#include <sys/mman.h>
+#include <unistd.h>
+
+#if defined(__FreeBSD__) && !(_POSIX_C_SOURCE >= 200112L)
+const size_t kPageSize = getpagesize();
+#else
+const size_t kPageSize = sysconf(_SC_PAGESIZE);
+#endif
+#endif
+
+static size_t alignToPageSize(size_t size)
+{
+    return (size + kPageSize - 1) & ~(kPageSize - 1);
+}
+
+#if defined(_WIN32)
+static uint8_t* allocatePages(size_t size)
+{
+    return (uint8_t*)VirtualAlloc(nullptr, alignToPageSize(size), MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+}
+
+static void freePages(uint8_t* mem, size_t size)
+{
+    if (VirtualFree(mem, 0, MEM_RELEASE) == 0)
+        LUAU_ASSERT(!"failed to deallocate block memory");
+}
+
+static void makePagesExecutable(uint8_t* mem, size_t size)
+{
+    LUAU_ASSERT((uintptr_t(mem) & (kPageSize - 1)) == 0);
+    LUAU_ASSERT(size == alignToPageSize(size));
+
+    DWORD oldProtect;
+    if (VirtualProtect(mem, size, PAGE_EXECUTE_READ, &oldProtect) == 0)
+        LUAU_ASSERT(!"failed to change page protection");
+}
+
+static void flushInstructionCache(uint8_t* mem, size_t size)
+{
+    if (FlushInstructionCache(GetCurrentProcess(), mem, size) == 0)
+        LUAU_ASSERT(!"failed to flush instruction cache");
+}
+#else
+static uint8_t* allocatePages(size_t size)
+{
+    return (uint8_t*)mmap(nullptr, alignToPageSize(size), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+}
+
+static void freePages(uint8_t* mem, size_t size)
+{
+    if (munmap(mem, alignToPageSize(size)) != 0)
+        LUAU_ASSERT(!"failed to deallocate block memory");
+}
+
+static void makePagesExecutable(uint8_t* mem, size_t size)
+{
+    LUAU_ASSERT((uintptr_t(mem) & (kPageSize - 1)) == 0);
+    LUAU_ASSERT(size == alignToPageSize(size));
+
+    if (mprotect(mem, size, PROT_READ | PROT_EXEC) != 0)
+        LUAU_ASSERT(!"failed to change page protection");
+}
+
+static void flushInstructionCache(uint8_t* mem, size_t size)
+{
+    __builtin___clear_cache((char*)mem, (char*)mem + size);
+}
+#endif
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+CodeAllocator::CodeAllocator(size_t blockSize, size_t maxTotalSize)
+    : blockSize(blockSize)
+    , maxTotalSize(maxTotalSize)
+{
+    LUAU_ASSERT(blockSize > kMaxReservedDataSize);
+    LUAU_ASSERT(maxTotalSize >= blockSize);
+}
+
+CodeAllocator::~CodeAllocator()
+{
+    if (destroyBlockUnwindInfo)
+    {
+        for (void* unwindInfo : unwindInfos)
+            destroyBlockUnwindInfo(context, unwindInfo);
+    }
+
+    for (uint8_t* block : blocks)
+        freePages(block, blockSize);
+}
+
+bool CodeAllocator::allocate(
+    const uint8_t* data, size_t dataSize, const uint8_t* code, size_t codeSize, uint8_t*& result, size_t& resultSize, uint8_t*& resultCodeStart)
+{
+    // 'Round up' to preserve code alignment
+    size_t alignedDataSize = (dataSize + (kCodeAlignment - 1)) & ~(kCodeAlignment - 1);
+
+    size_t totalSize = alignedDataSize + codeSize;
+
+    // Function has to fit into a single block with unwinding information
+    if (totalSize > blockSize - kMaxReservedDataSize)
+        return false;
+
+    size_t startOffset = 0;
+
+    // We might need a new block
+    if (totalSize > size_t(blockEnd - blockPos))
+    {
+        if (!allocateNewBlock(startOffset))
+            return false;
+
+        LUAU_ASSERT(totalSize <= size_t(blockEnd - blockPos));
+    }
+
+    LUAU_ASSERT((uintptr_t(blockPos) & (kPageSize - 1)) == 0); // Allocation starts on page boundary
+
+    size_t dataOffset = startOffset + alignedDataSize - dataSize;
+    size_t codeOffset = startOffset + alignedDataSize;
+
+    if (dataSize)
+        memcpy(blockPos + dataOffset, data, dataSize);
+    if (codeSize)
+        memcpy(blockPos + codeOffset, code, codeSize);
+
+    size_t pageAlignedSize = alignToPageSize(startOffset + totalSize);
+
+    makePagesExecutable(blockPos, pageAlignedSize);
+    flushInstructionCache(blockPos + codeOffset, codeSize);
+
+    result = blockPos + startOffset;
+    resultSize = totalSize;
+    resultCodeStart = blockPos + codeOffset;
+
+    // Ensure that future allocations from the block start from a page boundary.
+    // This is important since we use W^X, and writing to the previous page would require briefly removing
+    // executable bit from it, which may result in access violations if that code is being executed concurrently.
+    if (pageAlignedSize <= size_t(blockEnd - blockPos))
+    {
+        blockPos += pageAlignedSize;
+        LUAU_ASSERT((uintptr_t(blockPos) & (kPageSize - 1)) == 0);
+        LUAU_ASSERT(blockPos <= blockEnd);
+    }
+    else
+    {
+        // Future allocations will need to allocate fresh blocks
+        blockPos = blockEnd;
+    }
+
+    return true;
+}
+
+bool CodeAllocator::allocateNewBlock(size_t& unwindInfoSize)
+{
+    // Stop allocating once we reach a global limit
+    if ((blocks.size() + 1) * blockSize > maxTotalSize)
+        return false;
+
+    uint8_t* block = allocatePages(blockSize);
+
+    if (!block)
+        return false;
+
+    blockPos = block;
+    blockEnd = block + blockSize;
+
+    blocks.push_back(block);
+
+    if (createBlockUnwindInfo)
+    {
+        void* unwindInfo = createBlockUnwindInfo(context, block, blockSize, unwindInfoSize);
+
+        // 'Round up' to preserve alignment of the following data and code
+        unwindInfoSize = (unwindInfoSize + (kCodeAlignment - 1)) & ~(kCodeAlignment - 1);
+
+        LUAU_ASSERT(unwindInfoSize <= kMaxReservedDataSize);
+
+        if (!unwindInfo)
+            return false;
+
+        unwindInfos.push_back(unwindInfo);
+    }
+
+    return true;
+}
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/CodeBlockUnwind.cpp
+++ b/luau/CodeGen/src/CodeBlockUnwind.cpp
@ -0,0 +1,121 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "Luau/CodeBlockUnwind.h"
+
+#include "Luau/CodeAllocator.h"
+#include "Luau/UnwindBuilder.h"
+
+#include <string.h>
+#include <stdlib.h>
+
+#if defined(_WIN32) && defined(_M_X64)
+
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+#include <Windows.h>
+
+#elif defined(__linux__) || defined(__APPLE__)
+
+// Defined in unwind.h which may not be easily discoverable on various platforms
+extern "C" void __register_frame(const void*);
+extern "C" void __deregister_frame(const void*);
+
+extern "C" void __unw_add_dynamic_fde() __attribute__((weak));
+
+#endif
+
+#if defined(__APPLE__) && defined(__aarch64__)
+#include <sys/sysctl.h>
+#endif
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+#if defined(__linux__) || defined(__APPLE__)
+static void visitFdeEntries(char* pos, void (*cb)(const void*))
+{
+    // When using glibc++ unwinder, we need to call __register_frame/__deregister_frame on the entire .eh_frame data
+    // When using libc++ unwinder (libunwind), each FDE has to be handled separately
+    // libc++ unwinder is the macOS unwinder, but on Linux the unwinder depends on the library the executable is linked with
+    // __unw_add_dynamic_fde is specific to libc++ unwinder, as such we determine the library based on its existence
+    if (__unw_add_dynamic_fde == nullptr)
+        return cb(pos);
+
+    for (;;)
+    {
+        unsigned partLength;
+        memcpy(&partLength, pos, sizeof(partLength));
+
+        if (partLength == 0) // Zero-length section signals completion
+            break;
+
+        unsigned partId;
+        memcpy(&partId, pos + 4, sizeof(partId));
+
+        if (partId != 0) // Skip CIE part
+            cb(pos);     // CIE is found using an offset in FDE
+
+        pos += partLength + 4;
+    }
+}
+#endif
+
+void* createBlockUnwindInfo(void* context, uint8_t* block, size_t blockSize, size_t& beginOffset)
+{
+    UnwindBuilder* unwind = (UnwindBuilder*)context;
+
+    // All unwinding related data is placed together at the start of the block
+    size_t unwindSize = unwind->getSize();
+    unwindSize = (unwindSize + (kCodeAlignment - 1)) & ~(kCodeAlignment - 1); // Match code allocator alignment
+    LUAU_ASSERT(blockSize >= unwindSize);
+
+    char* unwindData = (char*)block;
+    unwind->finalize(unwindData, unwindSize, block, blockSize);
+
+#if defined(_WIN32) && defined(_M_X64)
+    if (!RtlAddFunctionTable((RUNTIME_FUNCTION*)block, uint32_t(unwind->getFunctionCount()), uintptr_t(block)))
+    {
+        LUAU_ASSERT(!"failed to allocate function table");
+        return nullptr;
+    }
+#elif defined(__linux__) || defined(__APPLE__)
+    visitFdeEntries(unwindData, __register_frame);
+#endif
+
+    beginOffset = unwindSize + unwind->getBeginOffset();
+    return block;
+}
+
+void destroyBlockUnwindInfo(void* context, void* unwindData)
+{
+#if defined(_WIN32) && defined(_M_X64)
+    if (!RtlDeleteFunctionTable((RUNTIME_FUNCTION*)unwindData))
+        LUAU_ASSERT(!"failed to deallocate function table");
+#elif defined(__linux__) || defined(__APPLE__)
+    visitFdeEntries((char*)unwindData, __deregister_frame);
+#endif
+}
+
+bool isUnwindSupported()
+{
+#if defined(_WIN32) && defined(_M_X64)
+    return true;
+#elif defined(__APPLE__) && defined(__aarch64__)
+    char ver[256];
+    size_t verLength = sizeof(ver);
+    // libunwind on macOS 12 and earlier (which maps to osrelease 21) assumes JIT frames use pointer authentication without a way to override that
+    return sysctlbyname("kern.osrelease", ver, &verLength, NULL, 0) == 0 && atoi(ver) >= 22;
+#elif defined(__linux__) || defined(__APPLE__)
+    return true;
+#else
+    return false;
+#endif
+}
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/CodeGen.cpp
+++ b/luau/CodeGen/src/CodeGen.cpp
@ -0,0 +1,616 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "Luau/CodeGen.h"
+
+#include "Luau/Common.h"
+#include "Luau/CodeAllocator.h"
+#include "Luau/CodeBlockUnwind.h"
+#include "Luau/IrAnalysis.h"
+#include "Luau/IrBuilder.h"
+#include "Luau/IrDump.h"
+#include "Luau/IrUtils.h"
+#include "Luau/OptimizeConstProp.h"
+#include "Luau/OptimizeFinalX64.h"
+
+#include "Luau/UnwindBuilder.h"
+#include "Luau/UnwindBuilderDwarf2.h"
+#include "Luau/UnwindBuilderWin.h"
+
+#include "Luau/AssemblyBuilderA64.h"
+#include "Luau/AssemblyBuilderX64.h"
+
+#include "CustomExecUtils.h"
+#include "NativeState.h"
+
+#include "CodeGenA64.h"
+#include "EmitCommonA64.h"
+#include "IrLoweringA64.h"
+
+#include "CodeGenX64.h"
+#include "EmitCommonX64.h"
+#include "EmitInstructionX64.h"
+#include "IrLoweringX64.h"
+
+#include "lapi.h"
+
+#include <algorithm>
+#include <memory>
+#include <optional>
+
+#if defined(__x86_64__) || defined(_M_X64)
+#ifdef _MSC_VER
+#include <intrin.h> // __cpuid
+#else
+#include <cpuid.h> // __cpuid
+#endif
+#endif
+
+#if defined(__aarch64__)
+#ifdef __APPLE__
+#include <sys/sysctl.h>
+#endif
+#endif
+
+LUAU_FASTFLAGVARIABLE(DebugCodegenNoOpt, false)
+LUAU_FASTFLAGVARIABLE(DebugCodegenOptSize, false)
+LUAU_FASTFLAGVARIABLE(DebugCodegenSkipNumbering, false)
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+static void* gPerfLogContext = nullptr;
+static PerfLogFn gPerfLogFn = nullptr;
+
+struct NativeProto
+{
+    Proto* p;
+    void* execdata;
+    uintptr_t exectarget;
+};
+
+static NativeProto createNativeProto(Proto* proto, const IrBuilder& ir)
+{
+    int sizecode = proto->sizecode;
+
+    uint32_t* instOffsets = new uint32_t[sizecode];
+    uint32_t instTarget = ir.function.bcMapping[0].asmLocation;
+
+    for (int i = 0; i < sizecode; i++)
+    {
+        LUAU_ASSERT(ir.function.bcMapping[i].asmLocation >= instTarget);
+
+        instOffsets[i] = ir.function.bcMapping[i].asmLocation - instTarget;
+    }
+
+    // entry target will be relocated when assembly is finalized
+    return {proto, instOffsets, instTarget};
+}
+
+static void destroyExecData(void* execdata)
+{
+    delete[] static_cast<uint32_t*>(execdata);
+}
+
+static void logPerfFunction(Proto* p, uintptr_t addr, unsigned size)
+{
+    LUAU_ASSERT(p->source);
+
+    const char* source = getstr(p->source);
+    source = (source[0] == '=' || source[0] == '@') ? source + 1 : "[string]";
+
+    char name[256];
+    snprintf(name, sizeof(name), "<luau> %s:%d %s", source, p->linedefined, p->debugname ? getstr(p->debugname) : "");
+
+    if (gPerfLogFn)
+        gPerfLogFn(gPerfLogContext, addr, size, name);
+}
+
+template<typename AssemblyBuilder, typename IrLowering>
+static bool lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction& function, int bytecodeid, AssemblyOptions options)
+{
+    // While we will need a better block ordering in the future, right now we want to mostly preserve build order with fallbacks outlined
+    std::vector<uint32_t> sortedBlocks;
+    sortedBlocks.reserve(function.blocks.size());
+    for (uint32_t i = 0; i < function.blocks.size(); i++)
+        sortedBlocks.push_back(i);
+
+    std::sort(sortedBlocks.begin(), sortedBlocks.end(), [&](uint32_t idxA, uint32_t idxB) {
+        const IrBlock& a = function.blocks[idxA];
+        const IrBlock& b = function.blocks[idxB];
+
+        // Place fallback blocks at the end
+        if ((a.kind == IrBlockKind::Fallback) != (b.kind == IrBlockKind::Fallback))
+            return (a.kind == IrBlockKind::Fallback) < (b.kind == IrBlockKind::Fallback);
+
+        // Try to order by instruction order
+        return a.start < b.start;
+    });
+
+    // For each IR instruction that begins a bytecode instruction, which bytecode instruction is it?
+    std::vector<uint32_t> bcLocations(function.instructions.size() + 1, ~0u);
+
+    for (size_t i = 0; i < function.bcMapping.size(); ++i)
+    {
+        uint32_t irLocation = function.bcMapping[i].irLocation;
+
+        if (irLocation != ~0u)
+            bcLocations[irLocation] = uint32_t(i);
+    }
+
+    bool outputEnabled = options.includeAssembly || options.includeIr;
+
+    IrToStringContext ctx{build.text, function.blocks, function.constants, function.cfg};
+
+    // We use this to skip outlined fallback blocks from IR/asm text output
+    size_t textSize = build.text.length();
+    uint32_t codeSize = build.getCodeSize();
+    bool seenFallback = false;
+
+    IrBlock dummy;
+    dummy.start = ~0u;
+
+    for (size_t i = 0; i < sortedBlocks.size(); ++i)
+    {
+        uint32_t blockIndex = sortedBlocks[i];
+        IrBlock& block = function.blocks[blockIndex];
+
+        if (block.kind == IrBlockKind::Dead)
+            continue;
+
+        LUAU_ASSERT(block.start != ~0u);
+        LUAU_ASSERT(block.finish != ~0u);
+
+        // If we want to skip fallback code IR/asm, we'll record when those blocks start once we see them
+        if (block.kind == IrBlockKind::Fallback && !seenFallback)
+        {
+            textSize = build.text.length();
+            codeSize = build.getCodeSize();
+            seenFallback = true;
+        }
+
+        if (options.includeIr)
+        {
+            build.logAppend("# ");
+            toStringDetailed(ctx, block, blockIndex, /* includeUseInfo */ true);
+        }
+
+        // Values can only reference restore operands in the current block
+        function.validRestoreOpBlockIdx = blockIndex;
+
+        build.setLabel(block.label);
+
+        for (uint32_t index = block.start; index <= block.finish; index++)
+        {
+            LUAU_ASSERT(index < function.instructions.size());
+
+            uint32_t bcLocation = bcLocations[index];
+
+            // If IR instruction is the first one for the original bytecode, we can annotate it with source code text
+            if (outputEnabled && options.annotator && bcLocation != ~0u)
+            {
+                options.annotator(options.annotatorContext, build.text, bytecodeid, bcLocation);
+            }
+
+            // If bytecode needs the location of this instruction for jumps, record it
+            if (bcLocation != ~0u)
+            {
+                Label label = (index == block.start) ? block.label : build.setLabel();
+                function.bcMapping[bcLocation].asmLocation = build.getLabelOffset(label);
+            }
+
+            IrInst& inst = function.instructions[index];
+
+            // Skip pseudo instructions, but make sure they are not used at this stage
+            // This also prevents them from getting into text output when that's enabled
+            if (isPseudo(inst.cmd))
+            {
+                LUAU_ASSERT(inst.useCount == 0);
+                continue;
+            }
+
+            // Either instruction result value is not referenced or the use count is not zero
+            LUAU_ASSERT(inst.lastUse == 0 || inst.useCount != 0);
+
+            if (options.includeIr)
+            {
+                build.logAppend("# ");
+                toStringDetailed(ctx, block, blockIndex, inst, index, /* includeUseInfo */ true);
+            }
+
+            IrBlock& next = i + 1 < sortedBlocks.size() ? function.blocks[sortedBlocks[i + 1]] : dummy;
+
+            lowering.lowerInst(inst, index, next);
+
+            if (lowering.hasError())
+            {
+                // Place labels for all blocks that we're skipping
+                // This is needed to avoid AssemblyBuilder assertions about jumps in earlier blocks with unplaced labels
+                for (size_t j = i + 1; j < sortedBlocks.size(); ++j)
+                {
+                    IrBlock& abandoned = function.blocks[sortedBlocks[j]];
+
+                    build.setLabel(abandoned.label);
+                }
+
+                return false;
+            }
+        }
+
+        lowering.finishBlock();
+
+        if (options.includeIr)
+            build.logAppend("#\n");
+    }
+
+    if (outputEnabled && !options.includeOutlinedCode && seenFallback)
+    {
+        build.text.resize(textSize);
+
+        if (options.includeAssembly)
+            build.logAppend("; skipping %u bytes of outlined code\n", unsigned((build.getCodeSize() - codeSize) * sizeof(build.code[0])));
+    }
+
+    return true;
+}
+
+[[maybe_unused]] static bool lowerIr(
+    X64::AssemblyBuilderX64& build, IrBuilder& ir, NativeState& data, ModuleHelpers& helpers, Proto* proto, AssemblyOptions options)
+{
+    optimizeMemoryOperandsX64(ir.function);
+
+    X64::IrLoweringX64 lowering(build, helpers, data, ir.function);
+
+    return lowerImpl(build, lowering, ir.function, proto->bytecodeid, options);
+}
+
+[[maybe_unused]] static bool lowerIr(
+    A64::AssemblyBuilderA64& build, IrBuilder& ir, NativeState& data, ModuleHelpers& helpers, Proto* proto, AssemblyOptions options)
+{
+    A64::IrLoweringA64 lowering(build, helpers, data, proto, ir.function);
+
+    return lowerImpl(build, lowering, ir.function, proto->bytecodeid, options);
+}
+
+template<typename AssemblyBuilder>
+static std::optional<NativeProto> assembleFunction(AssemblyBuilder& build, NativeState& data, ModuleHelpers& helpers, Proto* proto, AssemblyOptions options)
+{
+    if (options.includeAssembly || options.includeIr)
+    {
+        if (proto->debugname)
+            build.logAppend("; function %s(", getstr(proto->debugname));
+        else
+            build.logAppend("; function(");
+
+        for (int i = 0; i < proto->numparams; i++)
+        {
+            LocVar* var = proto->locvars ? &proto->locvars[proto->sizelocvars - proto->numparams + i] : nullptr;
+
+            if (var && var->varname)
+                build.logAppend("%s%s", i == 0 ? "" : ", ", getstr(var->varname));
+            else
+                build.logAppend("%s$arg%d", i == 0 ? "" : ", ", i);
+        }
+
+        if (proto->numparams != 0 && proto->is_vararg)
+            build.logAppend(", ...)");
+        else
+            build.logAppend(")");
+
+        if (proto->linedefined >= 0)
+            build.logAppend(" line %d\n", proto->linedefined);
+        else
+            build.logAppend("\n");
+    }
+
+    IrBuilder ir;
+    ir.buildFunctionIr(proto);
+
+    computeCfgInfo(ir.function);
+
+    if (!FFlag::DebugCodegenNoOpt)
+    {
+        bool useValueNumbering = !FFlag::DebugCodegenSkipNumbering;
+
+        constPropInBlockChains(ir, useValueNumbering);
+
+        if (!FFlag::DebugCodegenOptSize)
+            createLinearBlocks(ir, useValueNumbering);
+    }
+
+    if (!lowerIr(build, ir, data, helpers, proto, options))
+    {
+        if (build.logText)
+            build.logAppend("; skipping (can't lower)\n\n");
+
+        return std::nullopt;
+    }
+
+    if (build.logText)
+        build.logAppend("\n");
+
+    return createNativeProto(proto, ir);
+}
+
+static void onCloseState(lua_State* L)
+{
+    destroyNativeState(L);
+}
+
+static void onDestroyFunction(lua_State* L, Proto* proto)
+{
+    destroyExecData(proto->execdata);
+    proto->execdata = nullptr;
+    proto->exectarget = 0;
+}
+
+static int onEnter(lua_State* L, Proto* proto)
+{
+    NativeState* data = getNativeState(L);
+
+    LUAU_ASSERT(proto->execdata);
+    LUAU_ASSERT(L->ci->savedpc >= proto->code && L->ci->savedpc < proto->code + proto->sizecode);
+
+    uintptr_t target = proto->exectarget + static_cast<uint32_t*>(proto->execdata)[L->ci->savedpc - proto->code];
+
+    // Returns 1 to finish the function in the VM
+    return GateFn(data->context.gateEntry)(L, proto, target, &data->context);
+}
+
+static void onSetBreakpoint(lua_State* L, Proto* proto, int instruction)
+{
+    if (!proto->execdata)
+        return;
+
+    LUAU_ASSERT(!"native breakpoints are not implemented");
+}
+
+#if defined(__aarch64__)
+static unsigned int getCpuFeaturesA64()
+{
+    unsigned int result = 0;
+
+#ifdef __APPLE__
+    int jscvt = 0;
+    size_t jscvtLen = sizeof(jscvt);
+    if (sysctlbyname("hw.optional.arm.FEAT_JSCVT", &jscvt, &jscvtLen, nullptr, 0) == 0 && jscvt == 1)
+        result |= A64::Feature_JSCVT;
+#endif
+
+    return result;
+}
+#endif
+
+bool isSupported()
+{
+    if (!LUA_CUSTOM_EXECUTION)
+        return false;
+
+    if (LUA_EXTRA_SIZE != 1)
+        return false;
+
+    if (sizeof(TValue) != 16)
+        return false;
+
+    if (sizeof(LuaNode) != 32)
+        return false;
+
+    // Windows CRT uses stack unwinding in longjmp so we have to use unwind data; on other platforms, it's only necessary for C++ EH.
+#if defined(_WIN32)
+    if (!isUnwindSupported())
+        return false;
+#else
+    if (!LUA_USE_LONGJMP && !isUnwindSupported())
+        return false;
+#endif
+
+#if defined(__x86_64__) || defined(_M_X64)
+    int cpuinfo[4] = {};
+#ifdef _MSC_VER
+    __cpuid(cpuinfo, 1);
+#else
+    __cpuid(1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
+#endif
+
+    // We require AVX1 support for VEX encoded XMM operations
+    // We also requre SSE4.1 support for ROUNDSD but the AVX check below covers it
+    // https://en.wikipedia.org/wiki/CPUID#EAX=1:_Processor_Info_and_Feature_Bits
+    if ((cpuinfo[2] & (1 << 28)) == 0)
+        return false;
+
+    return true;
+#elif defined(__aarch64__)
+    return true;
+#else
+    return false;
+#endif
+}
+
+void create(lua_State* L)
+{
+    LUAU_ASSERT(isSupported());
+
+    NativeState& data = *createNativeState(L);
+
+#if defined(_WIN32)
+    data.unwindBuilder = std::make_unique<UnwindBuilderWin>();
+#else
+    data.unwindBuilder = std::make_unique<UnwindBuilderDwarf2>();
+#endif
+
+    data.codeAllocator.context = data.unwindBuilder.get();
+    data.codeAllocator.createBlockUnwindInfo = createBlockUnwindInfo;
+    data.codeAllocator.destroyBlockUnwindInfo = destroyBlockUnwindInfo;
+
+    initFunctions(data);
+
+#if defined(__x86_64__) || defined(_M_X64)
+    if (!X64::initHeaderFunctions(data))
+    {
+        destroyNativeState(L);
+        return;
+    }
+#elif defined(__aarch64__)
+    if (!A64::initHeaderFunctions(data))
+    {
+        destroyNativeState(L);
+        return;
+    }
+#endif
+
+    if (gPerfLogFn)
+        gPerfLogFn(gPerfLogContext, uintptr_t(data.context.gateEntry), 4096, "<luau gate>");
+
+    lua_ExecutionCallbacks* ecb = getExecutionCallbacks(L);
+
+    ecb->close = onCloseState;
+    ecb->destroy = onDestroyFunction;
+    ecb->enter = onEnter;
+    ecb->setbreakpoint = onSetBreakpoint;
+}
+
+static void gatherFunctions(std::vector<Proto*>& results, Proto* proto)
+{
+    if (results.size() <= size_t(proto->bytecodeid))
+        results.resize(proto->bytecodeid + 1);
+
+    // Skip protos that we've already compiled in this run: this happens because at -O2, inlined functions get their protos reused
+    if (results[proto->bytecodeid])
+        return;
+
+    results[proto->bytecodeid] = proto;
+
+    for (int i = 0; i < proto->sizep; i++)
+        gatherFunctions(results, proto->p[i]);
+}
+
+void compile(lua_State* L, int idx)
+{
+    LUAU_ASSERT(lua_isLfunction(L, idx));
+    const TValue* func = luaA_toobject(L, idx);
+
+    // If initialization has failed, do not compile any functions
+    if (!getNativeState(L))
+        return;
+
+#if defined(__aarch64__)
+    A64::AssemblyBuilderA64 build(/* logText= */ false, getCpuFeaturesA64());
+#else
+    X64::AssemblyBuilderX64 build(/* logText= */ false);
+#endif
+
+    NativeState* data = getNativeState(L);
+
+    std::vector<Proto*> protos;
+    gatherFunctions(protos, clvalue(func)->l.p);
+
+    ModuleHelpers helpers;
+#if defined(__aarch64__)
+    A64::assembleHelpers(build, helpers);
+#else
+    X64::assembleHelpers(build, helpers);
+#endif
+
+    std::vector<NativeProto> results;
+    results.reserve(protos.size());
+
+    // Skip protos that have been compiled during previous invocations of CodeGen::compile
+    for (Proto* p : protos)
+        if (p && p->execdata == nullptr)
+            if (std::optional<NativeProto> np = assembleFunction(build, *data, helpers, p, {}))
+                results.push_back(*np);
+
+    // Very large modules might result in overflowing a jump offset; in this case we currently abandon the entire module
+    if (!build.finalize())
+    {
+        for (NativeProto result : results)
+            destroyExecData(result.execdata);
+
+        return;
+    }
+
+    // If no functions were assembled, we don't need to allocate/copy executable pages for helpers
+    if (results.empty())
+        return;
+
+    uint8_t* nativeData = nullptr;
+    size_t sizeNativeData = 0;
+    uint8_t* codeStart = nullptr;
+    if (!data->codeAllocator.allocate(build.data.data(), int(build.data.size()), reinterpret_cast<const uint8_t*>(build.code.data()),
+            int(build.code.size() * sizeof(build.code[0])), nativeData, sizeNativeData, codeStart))
+    {
+        for (NativeProto result : results)
+            destroyExecData(result.execdata);
+
+        return;
+    }
+
+    if (gPerfLogFn && results.size() > 0)
+    {
+        gPerfLogFn(gPerfLogContext, uintptr_t(codeStart), uint32_t(results[0].exectarget), "<luau helpers>");
+
+        for (size_t i = 0; i < results.size(); ++i)
+        {
+            uint32_t begin = uint32_t(results[i].exectarget);
+            uint32_t end = i + 1 < results.size() ? uint32_t(results[i + 1].exectarget) : uint32_t(build.code.size() * sizeof(build.code[0]));
+            LUAU_ASSERT(begin < end);
+
+            logPerfFunction(results[i].p, uintptr_t(codeStart) + begin, end - begin);
+        }
+    }
+
+    for (NativeProto result : results)
+    {
+        // the memory is now managed by VM and will be freed via onDestroyFunction
+        result.p->execdata = result.execdata;
+        result.p->exectarget = uintptr_t(codeStart) + result.exectarget;
+    }
+}
+
+std::string getAssembly(lua_State* L, int idx, AssemblyOptions options)
+{
+    LUAU_ASSERT(lua_isLfunction(L, idx));
+    const TValue* func = luaA_toobject(L, idx);
+
+#if defined(__aarch64__)
+    A64::AssemblyBuilderA64 build(/* logText= */ options.includeAssembly, getCpuFeaturesA64());
+#else
+    X64::AssemblyBuilderX64 build(/* logText= */ options.includeAssembly);
+#endif
+
+    NativeState data;
+    initFunctions(data);
+
+    std::vector<Proto*> protos;
+    gatherFunctions(protos, clvalue(func)->l.p);
+
+    ModuleHelpers helpers;
+#if defined(__aarch64__)
+    A64::assembleHelpers(build, helpers);
+#else
+    X64::assembleHelpers(build, helpers);
+#endif
+
+    for (Proto* p : protos)
+        if (p)
+            if (std::optional<NativeProto> np = assembleFunction(build, data, helpers, p, options))
+                destroyExecData(np->execdata);
+
+    if (!build.finalize())
+        return std::string();
+
+    if (options.outputBinary)
+        return std::string(reinterpret_cast<const char*>(build.code.data()), reinterpret_cast<const char*>(build.code.data() + build.code.size())) +
+               std::string(build.data.begin(), build.data.end());
+    else
+        return build.text;
+}
+
+void setPerfLog(void* context, PerfLogFn logFn)
+{
+    gPerfLogContext = context;
+    gPerfLogFn = logFn;
+}
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/CodeGenA64.cpp
+++ b/luau/CodeGen/src/CodeGenA64.cpp
@ -0,0 +1,236 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "CodeGenA64.h"
+
+#include "Luau/AssemblyBuilderA64.h"
+#include "Luau/UnwindBuilder.h"
+
+#include "BitUtils.h"
+#include "CustomExecUtils.h"
+#include "NativeState.h"
+#include "EmitCommonA64.h"
+
+#include "lstate.h"
+
+namespace Luau
+{
+namespace CodeGen
+{
+namespace A64
+{
+
+struct EntryLocations
+{
+    Label start;
+    Label prologueEnd;
+    Label epilogueStart;
+};
+
+static void emitExit(AssemblyBuilderA64& build, bool continueInVm)
+{
+    build.mov(x0, continueInVm);
+    build.ldr(x1, mem(rNativeContext, offsetof(NativeContext, gateExit)));
+    build.br(x1);
+}
+
+static void emitInterrupt(AssemblyBuilderA64& build)
+{
+    // x0 = pc offset
+    // x1 = return address in native code
+
+    Label skip;
+
+    // Stash return address in rBase; we need to reload rBase anyway
+    build.mov(rBase, x1);
+
+    // Load interrupt handler; it may be nullptr in case the update raced with the check before we got here
+    build.ldr(x2, mem(rState, offsetof(lua_State, global)));
+    build.ldr(x2, mem(x2, offsetof(global_State, cb.interrupt)));
+    build.cbz(x2, skip);
+
+    // Update savedpc; required in case interrupt errors
+    build.add(x0, rCode, x0);
+    build.ldr(x1, mem(rState, offsetof(lua_State, ci)));
+    build.str(x0, mem(x1, offsetof(CallInfo, savedpc)));
+
+    // Call interrupt
+    build.mov(x0, rState);
+    build.mov(w1, -1);
+    build.blr(x2);
+
+    // Check if we need to exit
+    build.ldrb(w0, mem(rState, offsetof(lua_State, status)));
+    build.cbz(w0, skip);
+
+    // L->ci->savedpc--
+    // note: recomputing this avoids having to stash x0
+    build.ldr(x1, mem(rState, offsetof(lua_State, ci)));
+    build.ldr(x0, mem(x1, offsetof(CallInfo, savedpc)));
+    build.sub(x0, x0, sizeof(Instruction));
+    build.str(x0, mem(x1, offsetof(CallInfo, savedpc)));
+
+    emitExit(build, /* continueInVm */ false);
+
+    build.setLabel(skip);
+
+    // Return back to caller; rBase has stashed return address
+    build.mov(x0, rBase);
+
+    emitUpdateBase(build); // interrupt may have reallocated stack
+
+    build.br(x0);
+}
+
+static void emitReentry(AssemblyBuilderA64& build, ModuleHelpers& helpers)
+{
+    // x0 = closure object to reentry (equal to clvalue(L->ci->func))
+
+    // If the fallback requested an exit, we need to do this right away
+    build.cbz(x0, helpers.exitNoContinueVm);
+
+    emitUpdateBase(build);
+
+    // Need to update state of the current function before we jump away
+    build.ldr(x1, mem(x0, offsetof(Closure, l.p))); // cl->l.p aka proto
+
+    build.ldr(x2, mem(rState, offsetof(lua_State, ci))); // L->ci
+
+    // We need to check if the new frame can be executed natively
+    // TOOD: .flags and .savedpc load below can be fused with ldp
+    build.ldr(w3, mem(x2, offsetof(CallInfo, flags)));
+    build.tbz(x3, countrz(LUA_CALLINFO_CUSTOM), helpers.exitContinueVm);
+
+    build.mov(rClosure, x0);
+    build.ldr(rConstants, mem(x1, offsetof(Proto, k))); // proto->k
+    build.ldr(rCode, mem(x1, offsetof(Proto, code)));   // proto->code
+
+    // Get instruction index from instruction pointer
+    // To get instruction index from instruction pointer, we need to divide byte offset by 4
+    // But we will actually need to scale instruction index by 4 back to byte offset later so it cancels out
+    build.ldr(x2, mem(x2, offsetof(CallInfo, savedpc))); // L->ci->savedpc
+    build.sub(x2, x2, rCode);
+
+    // Get new instruction location and jump to it
+    LUAU_ASSERT(offsetof(Proto, exectarget) == offsetof(Proto, execdata) + 8);
+    build.ldp(x3, x4, mem(x1, offsetof(Proto, execdata)));
+    build.ldr(w2, mem(x3, x2));
+    build.add(x4, x4, x2);
+    build.br(x4);
+}
+
+static EntryLocations buildEntryFunction(AssemblyBuilderA64& build, UnwindBuilder& unwind)
+{
+    EntryLocations locations;
+
+    // Arguments: x0 = lua_State*, x1 = Proto*, x2 = native code pointer to jump to, x3 = NativeContext*
+
+    locations.start = build.setLabel();
+
+    // prologue
+    build.sub(sp, sp, kStackSize);
+    build.stp(x29, x30, mem(sp)); // fp, lr
+
+    // stash non-volatile registers used for execution environment
+    build.stp(x19, x20, mem(sp, 16));
+    build.stp(x21, x22, mem(sp, 32));
+    build.stp(x23, x24, mem(sp, 48));
+
+    build.mov(x29, sp); // this is only necessary if we maintain frame pointers, which we do in the JIT for now
+
+    locations.prologueEnd = build.setLabel();
+
+    uint32_t prologueSize = build.getLabelOffset(locations.prologueEnd) - build.getLabelOffset(locations.start);
+
+    // Setup native execution environment
+    build.mov(rState, x0);
+    build.mov(rNativeContext, x3);
+
+    build.ldr(rBase, mem(x0, offsetof(lua_State, base))); // L->base
+    build.ldr(rConstants, mem(x1, offsetof(Proto, k)));   // proto->k
+    build.ldr(rCode, mem(x1, offsetof(Proto, code)));     // proto->code
+
+    build.ldr(x9, mem(x0, offsetof(lua_State, ci)));          // L->ci
+    build.ldr(x9, mem(x9, offsetof(CallInfo, func)));         // L->ci->func
+    build.ldr(rClosure, mem(x9, offsetof(TValue, value.gc))); // L->ci->func->value.gc aka cl
+
+    // Jump to the specified instruction; further control flow will be handled with custom ABI with register setup from EmitCommonA64.h
+    build.br(x2);
+
+    // Even though we jumped away, we will return here in the end
+    locations.epilogueStart = build.setLabel();
+
+    // Cleanup and exit
+    build.ldp(x23, x24, mem(sp, 48));
+    build.ldp(x21, x22, mem(sp, 32));
+    build.ldp(x19, x20, mem(sp, 16));
+    build.ldp(x29, x30, mem(sp)); // fp, lr
+    build.add(sp, sp, kStackSize);
+
+    build.ret();
+
+    // Our entry function is special, it spans the whole remaining code area
+    unwind.startFunction();
+    unwind.prologueA64(prologueSize, kStackSize, {x29, x30, x19, x20, x21, x22, x23, x24});
+    unwind.finishFunction(build.getLabelOffset(locations.start), kFullBlockFuncton);
+
+    return locations;
+}
+
+bool initHeaderFunctions(NativeState& data)
+{
+    AssemblyBuilderA64 build(/* logText= */ false);
+    UnwindBuilder& unwind = *data.unwindBuilder.get();
+
+    unwind.startInfo(UnwindBuilder::A64);
+
+    EntryLocations entryLocations = buildEntryFunction(build, unwind);
+
+    build.finalize();
+
+    unwind.finishInfo();
+
+    LUAU_ASSERT(build.data.empty());
+
+    uint8_t* codeStart = nullptr;
+    if (!data.codeAllocator.allocate(build.data.data(), int(build.data.size()), reinterpret_cast<const uint8_t*>(build.code.data()),
+            int(build.code.size() * sizeof(build.code[0])), data.gateData, data.gateDataSize, codeStart))
+    {
+        LUAU_ASSERT(!"failed to create entry function");
+        return false;
+    }
+
+    // Set the offset at the begining so that functions in new blocks will not overlay the locations
+    // specified by the unwind information of the entry function
+    unwind.setBeginOffset(build.getLabelOffset(entryLocations.prologueEnd));
+
+    data.context.gateEntry = codeStart + build.getLabelOffset(entryLocations.start);
+    data.context.gateExit = codeStart + build.getLabelOffset(entryLocations.epilogueStart);
+
+    return true;
+}
+
+void assembleHelpers(AssemblyBuilderA64& build, ModuleHelpers& helpers)
+{
+    if (build.logText)
+        build.logAppend("; exitContinueVm\n");
+    helpers.exitContinueVm = build.setLabel();
+    emitExit(build, /* continueInVm */ true);
+
+    if (build.logText)
+        build.logAppend("; exitNoContinueVm\n");
+    helpers.exitNoContinueVm = build.setLabel();
+    emitExit(build, /* continueInVm */ false);
+
+    if (build.logText)
+        build.logAppend("; reentry\n");
+    helpers.reentry = build.setLabel();
+    emitReentry(build, helpers);
+
+    if (build.logText)
+        build.logAppend("; interrupt\n");
+    helpers.interrupt = build.setLabel();
+    emitInterrupt(build);
+}
+
+} // namespace A64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/CodeGenA64.h
+++ b/luau/CodeGen/src/CodeGenA64.h
@ -0,0 +1,22 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+struct NativeState;
+struct ModuleHelpers;
+
+namespace A64
+{
+
+class AssemblyBuilderA64;
+
+bool initHeaderFunctions(NativeState& data);
+void assembleHelpers(AssemblyBuilderA64& build, ModuleHelpers& helpers);
+
+} // namespace A64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/CodeGenUtils.cpp
+++ b/luau/CodeGen/src/CodeGenUtils.cpp
@ -0,0 +1,959 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "CodeGenUtils.h"
+
+#include "CustomExecUtils.h"
+
+#include "lvm.h"
+
+#include "lbuiltins.h"
+#include "lbytecode.h"
+#include "ldebug.h"
+#include "ldo.h"
+#include "lfunc.h"
+#include "lgc.h"
+#include "lmem.h"
+#include "lnumutils.h"
+#include "lstate.h"
+#include "lstring.h"
+#include "ltable.h"
+
+#include <string.h>
+
+LUAU_FASTFLAG(LuauUniformTopHandling)
+
+// All external function calls that can cause stack realloc or Lua calls have to be wrapped in VM_PROTECT
+// This makes sure that we save the pc (in case the Lua call needs to generate a backtrace) before the call,
+// and restores the stack pointer after in case stack gets reallocated
+// Should only be used on the slow paths.
+#define VM_PROTECT(x) \
+    { \
+        L->ci->savedpc = pc; \
+        { \
+            x; \
+        }; \
+        base = L->base; \
+    }
+
+// Some external functions can cause an error, but never reallocate the stack; for these, VM_PROTECT_PC() is
+// a cheaper version of VM_PROTECT that can be called before the external call.
+#define VM_PROTECT_PC() L->ci->savedpc = pc
+
+#define VM_REG(i) (LUAU_ASSERT(unsigned(i) < unsigned(L->top - base)), &base[i])
+#define VM_KV(i) (LUAU_ASSERT(unsigned(i) < unsigned(cl->l.p->sizek)), &k[i])
+#define VM_UV(i) (LUAU_ASSERT(unsigned(i) < unsigned(cl->nupvalues)), &cl->l.uprefs[i])
+
+#define VM_PATCH_C(pc, slot) *const_cast<Instruction*>(pc) = ((uint8_t(slot) << 24) | (0x00ffffffu & *(pc)))
+#define VM_PATCH_E(pc, slot) *const_cast<Instruction*>(pc) = ((uint32_t(slot) << 8) | (0x000000ffu & *(pc)))
+
+#define VM_INTERRUPT() \
+    { \
+        void (*interrupt)(lua_State*, int) = L->global->cb.interrupt; \
+        if (LUAU_UNLIKELY(!!interrupt)) \
+        { /* the interrupt hook is called right before we advance pc */ \
+            VM_PROTECT(L->ci->savedpc++; interrupt(L, -1)); \
+            if (L->status != 0) \
+            { \
+                L->ci->savedpc--; \
+                return NULL; \
+            } \
+        } \
+    }
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+bool forgLoopTableIter(lua_State* L, Table* h, int index, TValue* ra)
+{
+    int sizearray = h->sizearray;
+
+    // first we advance index through the array portion
+    while (unsigned(index) < unsigned(sizearray))
+    {
+        TValue* e = &h->array[index];
+
+        if (!ttisnil(e))
+        {
+            setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)));
+            setnvalue(ra + 3, double(index + 1));
+            setobj2s(L, ra + 4, e);
+
+            return true;
+        }
+
+        index++;
+    }
+
+    int sizenode = 1 << h->lsizenode;
+
+    // then we advance index through the hash portion
+    while (unsigned(index - h->sizearray) < unsigned(sizenode))
+    {
+        LuaNode* n = &h->node[index - sizearray];
+
+        if (!ttisnil(gval(n)))
+        {
+            setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)));
+            getnodekey(L, ra + 3, n);
+            setobj(L, ra + 4, gval(n));
+
+            return true;
+        }
+
+        index++;
+    }
+
+    return false;
+}
+
+bool forgLoopNodeIter(lua_State* L, Table* h, int index, TValue* ra)
+{
+    int sizearray = h->sizearray;
+    int sizenode = 1 << h->lsizenode;
+
+    // then we advance index through the hash portion
+    while (unsigned(index - sizearray) < unsigned(sizenode))
+    {
+        LuaNode* n = &h->node[index - sizearray];
+
+        if (!ttisnil(gval(n)))
+        {
+            setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)));
+            getnodekey(L, ra + 3, n);
+            setobj(L, ra + 4, gval(n));
+
+            return true;
+        }
+
+        index++;
+    }
+
+    return false;
+}
+
+bool forgLoopNonTableFallback(lua_State* L, int insnA, int aux)
+{
+    TValue* base = L->base;
+    TValue* ra = VM_REG(insnA);
+
+    // note: it's safe to push arguments past top for complicated reasons (see lvmexecute.cpp)
+    setobj2s(L, ra + 3 + 2, ra + 2);
+    setobj2s(L, ra + 3 + 1, ra + 1);
+    setobj2s(L, ra + 3, ra);
+
+    L->top = ra + 3 + 3; // func + 2 args (state and index)
+    LUAU_ASSERT(L->top <= L->stack_last);
+
+    luaD_call(L, ra + 3, uint8_t(aux));
+    L->top = L->ci->top;
+
+    // recompute ra since stack might have been reallocated
+    base = L->base;
+    ra = VM_REG(insnA);
+
+    // copy first variable back into the iteration index
+    setobj2s(L, ra + 2, ra + 3);
+
+    return !ttisnil(ra + 3);
+}
+
+void forgPrepXnextFallback(lua_State* L, TValue* ra, int pc)
+{
+    if (!ttisfunction(ra))
+    {
+        Closure* cl = clvalue(L->ci->func);
+        L->ci->savedpc = cl->l.p->code + pc;
+
+        luaG_typeerror(L, ra, "iterate over");
+    }
+}
+
+Closure* callProlog(lua_State* L, TValue* ra, StkId argtop, int nresults)
+{
+    // slow-path: not a function call
+    if (LUAU_UNLIKELY(!ttisfunction(ra)))
+    {
+        luaV_tryfuncTM(L, ra);
+        argtop++; // __call adds an extra self
+    }
+
+    Closure* ccl = clvalue(ra);
+
+    CallInfo* ci = incr_ci(L);
+    ci->func = ra;
+    ci->base = ra + 1;
+    ci->top = argtop + ccl->stacksize; // note: technically UB since we haven't reallocated the stack yet
+    ci->savedpc = NULL;
+    ci->flags = 0;
+    ci->nresults = nresults;
+
+    L->base = ci->base;
+    L->top = argtop;
+
+    // note: this reallocs stack, but we don't need to VM_PROTECT this
+    // this is because we're going to modify base/savedpc manually anyhow
+    // crucially, we can't use ra/argtop after this line
+    luaD_checkstack(L, ccl->stacksize);
+
+    return ccl;
+}
+
+void callEpilogC(lua_State* L, int nresults, int n)
+{
+    // ci is our callinfo, cip is our parent
+    CallInfo* ci = L->ci;
+    CallInfo* cip = ci - 1;
+
+    // copy return values into parent stack (but only up to nresults!), fill the rest with nil
+    // note: in MULTRET context nresults starts as -1 so i != 0 condition never activates intentionally
+    StkId res = ci->func;
+    StkId vali = L->top - n;
+    StkId valend = L->top;
+
+    int i;
+    for (i = nresults; i != 0 && vali < valend; i--)
+        setobj2s(L, res++, vali++);
+    while (i-- > 0)
+        setnilvalue(res++);
+
+    // pop the stack frame
+    L->ci = cip;
+    L->base = cip->base;
+    L->top = (nresults == LUA_MULTRET) ? res : cip->top;
+}
+
+// Extracted as-is from lvmexecute.cpp with the exception of control flow (reentry) and removed interrupts/savedpc
+Closure* callFallback(lua_State* L, StkId ra, StkId argtop, int nresults)
+{
+    // slow-path: not a function call
+    if (LUAU_UNLIKELY(!ttisfunction(ra)))
+    {
+        luaV_tryfuncTM(L, ra);
+        argtop++; // __call adds an extra self
+    }
+
+    Closure* ccl = clvalue(ra);
+
+    CallInfo* ci = incr_ci(L);
+    ci->func = ra;
+    ci->base = ra + 1;
+    ci->top = argtop + ccl->stacksize; // note: technically UB since we haven't reallocated the stack yet
+    ci->savedpc = NULL;
+    ci->flags = 0;
+    ci->nresults = nresults;
+
+    L->base = ci->base;
+    L->top = argtop;
+
+    // note: this reallocs stack, but we don't need to VM_PROTECT this
+    // this is because we're going to modify base/savedpc manually anyhow
+    // crucially, we can't use ra/argtop after this line
+    luaD_checkstack(L, ccl->stacksize);
+
+    LUAU_ASSERT(ci->top <= L->stack_last);
+
+    if (!ccl->isC)
+    {
+        Proto* p = ccl->l.p;
+
+        // fill unused parameters with nil
+        StkId argi = L->top;
+        StkId argend = L->base + p->numparams;
+        while (argi < argend)
+            setnilvalue(argi++); // complete missing arguments
+        L->top = p->is_vararg ? argi : ci->top;
+
+        // keep executing new function
+        ci->savedpc = p->code;
+
+        if (LUAU_LIKELY(p->execdata != NULL))
+            ci->flags = LUA_CALLINFO_CUSTOM;
+
+        return ccl;
+    }
+    else
+    {
+        lua_CFunction func = ccl->c.f;
+        int n = func(L);
+
+        // yield
+        if (n < 0)
+            return NULL;
+
+        // ci is our callinfo, cip is our parent
+        CallInfo* ci = L->ci;
+        CallInfo* cip = ci - 1;
+
+        // copy return values into parent stack (but only up to nresults!), fill the rest with nil
+        // note: in MULTRET context nresults starts as -1 so i != 0 condition never activates intentionally
+        StkId res = ci->func;
+        StkId vali = L->top - n;
+        StkId valend = L->top;
+
+        int i;
+        for (i = nresults; i != 0 && vali < valend; i--)
+            setobj2s(L, res++, vali++);
+        while (i-- > 0)
+            setnilvalue(res++);
+
+        // pop the stack frame
+        L->ci = cip;
+        L->base = cip->base;
+        L->top = (nresults == LUA_MULTRET) ? res : cip->top;
+
+        // keep executing current function
+        LUAU_ASSERT(isLua(cip));
+        return clvalue(cip->func);
+    }
+}
+
+// Extracted as-is from lvmexecute.cpp with the exception of control flow (reentry) and removed interrupts
+Closure* returnFallback(lua_State* L, StkId ra, StkId valend)
+{
+    // ci is our callinfo, cip is our parent
+    CallInfo* ci = L->ci;
+    CallInfo* cip = ci - 1;
+
+    StkId res = ci->func; // note: we assume CALL always puts func+args and expects results to start at func
+    StkId vali = ra;
+
+    int nresults = ci->nresults;
+
+    // copy return values into parent stack (but only up to nresults!), fill the rest with nil
+    // note: in MULTRET context nresults starts as -1 so i != 0 condition never activates intentionally
+    int i;
+    for (i = nresults; i != 0 && vali < valend; i--)
+        setobj2s(L, res++, vali++);
+    while (i-- > 0)
+        setnilvalue(res++);
+
+    // pop the stack frame
+    L->ci = cip;
+    L->base = cip->base;
+    L->top = (nresults == LUA_MULTRET) ? res : cip->top;
+
+    // we're done!
+    if (LUAU_UNLIKELY(ci->flags & LUA_CALLINFO_RETURN))
+    {
+        if (!FFlag::LuauUniformTopHandling)
+            L->top = res;
+        return NULL;
+    }
+
+    // keep executing new function
+    LUAU_ASSERT(isLua(cip));
+    return clvalue(cip->func);
+}
+
+const Instruction* executeGETGLOBAL(lua_State* L, const Instruction* pc, StkId base, TValue* k)
+{
+    [[maybe_unused]] Closure* cl = clvalue(L->ci->func);
+    Instruction insn = *pc++;
+    StkId ra = VM_REG(LUAU_INSN_A(insn));
+    uint32_t aux = *pc++;
+    TValue* kv = VM_KV(aux);
+    LUAU_ASSERT(ttisstring(kv));
+
+    // fast-path should already have been checked, so we skip checking for it here
+    Table* h = cl->env;
+    int slot = LUAU_INSN_C(insn) & h->nodemask8;
+
+    // slow-path, may invoke Lua calls via __index metamethod
+    TValue g;
+    sethvalue(L, &g, h);
+    L->cachedslot = slot;
+    VM_PROTECT(luaV_gettable(L, &g, kv, ra));
+    // save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
+    VM_PATCH_C(pc - 2, L->cachedslot);
+    return pc;
+}
+
+const Instruction* executeSETGLOBAL(lua_State* L, const Instruction* pc, StkId base, TValue* k)
+{
+    [[maybe_unused]] Closure* cl = clvalue(L->ci->func);
+    Instruction insn = *pc++;
+    StkId ra = VM_REG(LUAU_INSN_A(insn));
+    uint32_t aux = *pc++;
+    TValue* kv = VM_KV(aux);
+    LUAU_ASSERT(ttisstring(kv));
+
+    // fast-path should already have been checked, so we skip checking for it here
+    Table* h = cl->env;
+    int slot = LUAU_INSN_C(insn) & h->nodemask8;
+
+    // slow-path, may invoke Lua calls via __newindex metamethod
+    TValue g;
+    sethvalue(L, &g, h);
+    L->cachedslot = slot;
+    VM_PROTECT(luaV_settable(L, &g, kv, ra));
+    // save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
+    VM_PATCH_C(pc - 2, L->cachedslot);
+    return pc;
+}
+
+const Instruction* executeGETTABLEKS(lua_State* L, const Instruction* pc, StkId base, TValue* k)
+{
+    [[maybe_unused]] Closure* cl = clvalue(L->ci->func);
+    Instruction insn = *pc++;
+    StkId ra = VM_REG(LUAU_INSN_A(insn));
+    StkId rb = VM_REG(LUAU_INSN_B(insn));
+    uint32_t aux = *pc++;
+    TValue* kv = VM_KV(aux);
+    LUAU_ASSERT(ttisstring(kv));
+
+    // fast-path: built-in table
+    if (ttistable(rb))
+    {
+        Table* h = hvalue(rb);
+
+        int slot = LUAU_INSN_C(insn) & h->nodemask8;
+        LuaNode* n = &h->node[slot];
+
+        // fast-path: value is in expected slot
+        if (LUAU_LIKELY(ttisstring(gkey(n)) && tsvalue(gkey(n)) == tsvalue(kv) && !ttisnil(gval(n))))
+        {
+            setobj2s(L, ra, gval(n));
+            return pc;
+        }
+        else if (!h->metatable)
+        {
+            // fast-path: value is not in expected slot, but the table lookup doesn't involve metatable
+            const TValue* res = luaH_getstr(h, tsvalue(kv));
+
+            if (res != luaO_nilobject)
+            {
+                int cachedslot = gval2slot(h, res);
+                // save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
+                VM_PATCH_C(pc - 2, cachedslot);
+            }
+
+            setobj2s(L, ra, res);
+            return pc;
+        }
+        else
+        {
+            // slow-path, may invoke Lua calls via __index metamethod
+            L->cachedslot = slot;
+            VM_PROTECT(luaV_gettable(L, rb, kv, ra));
+            // save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
+            VM_PATCH_C(pc - 2, L->cachedslot);
+            return pc;
+        }
+    }
+    else
+    {
+        // fast-path: user data with C __index TM
+        const TValue* fn = 0;
+        if (ttisuserdata(rb) && (fn = fasttm(L, uvalue(rb)->metatable, TM_INDEX)) && ttisfunction(fn) && clvalue(fn)->isC)
+        {
+            // note: it's safe to push arguments past top for complicated reasons (see top of the file)
+            LUAU_ASSERT(L->top + 3 < L->stack + L->stacksize);
+            StkId top = L->top;
+            setobj2s(L, top + 0, fn);
+            setobj2s(L, top + 1, rb);
+            setobj2s(L, top + 2, kv);
+            L->top = top + 3;
+
+            L->cachedslot = LUAU_INSN_C(insn);
+            VM_PROTECT(luaV_callTM(L, 2, LUAU_INSN_A(insn)));
+            // save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
+            VM_PATCH_C(pc - 2, L->cachedslot);
+            return pc;
+        }
+        else if (ttisvector(rb))
+        {
+            // fast-path: quick case-insensitive comparison with "X"/"Y"/"Z"
+            const char* name = getstr(tsvalue(kv));
+            int ic = (name[0] | ' ') - 'x';
+
+#if LUA_VECTOR_SIZE == 4
+            // 'w' is before 'x' in ascii, so ic is -1 when indexing with 'w'
+            if (ic == -1)
+                ic = 3;
+#endif
+
+            if (unsigned(ic) < LUA_VECTOR_SIZE && name[1] == '\0')
+            {
+                const float* v = rb->value.v; // silences ubsan when indexing v[]
+                setnvalue(ra, v[ic]);
+                return pc;
+            }
+
+            fn = fasttm(L, L->global->mt[LUA_TVECTOR], TM_INDEX);
+
+            if (fn && ttisfunction(fn) && clvalue(fn)->isC)
+            {
+                // note: it's safe to push arguments past top for complicated reasons (see top of the file)
+                LUAU_ASSERT(L->top + 3 < L->stack + L->stacksize);
+                StkId top = L->top;
+                setobj2s(L, top + 0, fn);
+                setobj2s(L, top + 1, rb);
+                setobj2s(L, top + 2, kv);
+                L->top = top + 3;
+
+                L->cachedslot = LUAU_INSN_C(insn);
+                VM_PROTECT(luaV_callTM(L, 2, LUAU_INSN_A(insn)));
+                // save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
+                VM_PATCH_C(pc - 2, L->cachedslot);
+                return pc;
+            }
+
+            // fall through to slow path
+        }
+
+        // fall through to slow path
+    }
+
+    // slow-path, may invoke Lua calls via __index metamethod
+    VM_PROTECT(luaV_gettable(L, rb, kv, ra));
+    return pc;
+}
+
+const Instruction* executeSETTABLEKS(lua_State* L, const Instruction* pc, StkId base, TValue* k)
+{
+    [[maybe_unused]] Closure* cl = clvalue(L->ci->func);
+    Instruction insn = *pc++;
+    StkId ra = VM_REG(LUAU_INSN_A(insn));
+    StkId rb = VM_REG(LUAU_INSN_B(insn));
+    uint32_t aux = *pc++;
+    TValue* kv = VM_KV(aux);
+    LUAU_ASSERT(ttisstring(kv));
+
+    // fast-path: built-in table
+    if (ttistable(rb))
+    {
+        Table* h = hvalue(rb);
+
+        int slot = LUAU_INSN_C(insn) & h->nodemask8;
+        LuaNode* n = &h->node[slot];
+
+        // fast-path: value is in expected slot
+        if (LUAU_LIKELY(ttisstring(gkey(n)) && tsvalue(gkey(n)) == tsvalue(kv) && !ttisnil(gval(n)) && !h->readonly))
+        {
+            setobj2t(L, gval(n), ra);
+            luaC_barriert(L, h, ra);
+            return pc;
+        }
+        else if (fastnotm(h->metatable, TM_NEWINDEX) && !h->readonly)
+        {
+            VM_PROTECT_PC(); // set may fail
+
+            TValue* res = luaH_setstr(L, h, tsvalue(kv));
+            int cachedslot = gval2slot(h, res);
+            // save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
+            VM_PATCH_C(pc - 2, cachedslot);
+            setobj2t(L, res, ra);
+            luaC_barriert(L, h, ra);
+            return pc;
+        }
+        else
+        {
+            // slow-path, may invoke Lua calls via __newindex metamethod
+            L->cachedslot = slot;
+            VM_PROTECT(luaV_settable(L, rb, kv, ra));
+            // save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
+            VM_PATCH_C(pc - 2, L->cachedslot);
+            return pc;
+        }
+    }
+    else
+    {
+        // fast-path: user data with C __newindex TM
+        const TValue* fn = 0;
+        if (ttisuserdata(rb) && (fn = fasttm(L, uvalue(rb)->metatable, TM_NEWINDEX)) && ttisfunction(fn) && clvalue(fn)->isC)
+        {
+            // note: it's safe to push arguments past top for complicated reasons (see top of the file)
+            LUAU_ASSERT(L->top + 4 < L->stack + L->stacksize);
+            StkId top = L->top;
+            setobj2s(L, top + 0, fn);
+            setobj2s(L, top + 1, rb);
+            setobj2s(L, top + 2, kv);
+            setobj2s(L, top + 3, ra);
+            L->top = top + 4;
+
+            L->cachedslot = LUAU_INSN_C(insn);
+            VM_PROTECT(luaV_callTM(L, 3, -1));
+            // save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
+            VM_PATCH_C(pc - 2, L->cachedslot);
+            return pc;
+        }
+        else
+        {
+            // slow-path, may invoke Lua calls via __newindex metamethod
+            VM_PROTECT(luaV_settable(L, rb, kv, ra));
+            return pc;
+        }
+    }
+}
+
+const Instruction* executeNEWCLOSURE(lua_State* L, const Instruction* pc, StkId base, TValue* k)
+{
+    [[maybe_unused]] Closure* cl = clvalue(L->ci->func);
+    Instruction insn = *pc++;
+    StkId ra = VM_REG(LUAU_INSN_A(insn));
+
+    Proto* pv = cl->l.p->p[LUAU_INSN_D(insn)];
+    LUAU_ASSERT(unsigned(LUAU_INSN_D(insn)) < unsigned(cl->l.p->sizep));
+
+    VM_PROTECT_PC(); // luaF_newLclosure may fail due to OOM
+
+    // note: we save closure to stack early in case the code below wants to capture it by value
+    Closure* ncl = luaF_newLclosure(L, pv->nups, cl->env, pv);
+    setclvalue(L, ra, ncl);
+
+    for (int ui = 0; ui < pv->nups; ++ui)
+    {
+        Instruction uinsn = *pc++;
+        LUAU_ASSERT(LUAU_INSN_OP(uinsn) == LOP_CAPTURE);
+
+        switch (LUAU_INSN_A(uinsn))
+        {
+        case LCT_VAL:
+            setobj(L, &ncl->l.uprefs[ui], VM_REG(LUAU_INSN_B(uinsn)));
+            break;
+
+        case LCT_REF:
+            setupvalue(L, &ncl->l.uprefs[ui], luaF_findupval(L, VM_REG(LUAU_INSN_B(uinsn))));
+            break;
+
+        case LCT_UPVAL:
+            setobj(L, &ncl->l.uprefs[ui], VM_UV(LUAU_INSN_B(uinsn)));
+            break;
+
+        default:
+            LUAU_ASSERT(!"Unknown upvalue capture type");
+            LUAU_UNREACHABLE(); // improves switch() codegen by eliding opcode bounds checks
+        }
+    }
+
+    VM_PROTECT(luaC_checkGC(L));
+    return pc;
+}
+
+const Instruction* executeNAMECALL(lua_State* L, const Instruction* pc, StkId base, TValue* k)
+{
+    [[maybe_unused]] Closure* cl = clvalue(L->ci->func);
+    Instruction insn = *pc++;
+    StkId ra = VM_REG(LUAU_INSN_A(insn));
+    StkId rb = VM_REG(LUAU_INSN_B(insn));
+    uint32_t aux = *pc++;
+    TValue* kv = VM_KV(aux);
+    LUAU_ASSERT(ttisstring(kv));
+
+    if (ttistable(rb))
+    {
+        Table* h = hvalue(rb);
+        // note: we can't use nodemask8 here because we need to query the main position of the table, and 8-bit nodemask8 only works
+        // for predictive lookups
+        LuaNode* n = &h->node[tsvalue(kv)->hash & (sizenode(h) - 1)];
+
+        const TValue* mt = 0;
+        const LuaNode* mtn = 0;
+
+        // fast-path: key is in the table in expected slot
+        if (ttisstring(gkey(n)) && tsvalue(gkey(n)) == tsvalue(kv) && !ttisnil(gval(n)))
+        {
+            // note: order of copies allows rb to alias ra+1 or ra
+            setobj2s(L, ra + 1, rb);
+            setobj2s(L, ra, gval(n));
+        }
+        // fast-path: key is absent from the base, table has an __index table, and it has the result in the expected slot
+        else if (gnext(n) == 0 && (mt = fasttm(L, hvalue(rb)->metatable, TM_INDEX)) && ttistable(mt) &&
+                 (mtn = &hvalue(mt)->node[LUAU_INSN_C(insn) & hvalue(mt)->nodemask8]) && ttisstring(gkey(mtn)) && tsvalue(gkey(mtn)) == tsvalue(kv) &&
+                 !ttisnil(gval(mtn)))
+        {
+            // note: order of copies allows rb to alias ra+1 or ra
+            setobj2s(L, ra + 1, rb);
+            setobj2s(L, ra, gval(mtn));
+        }
+        else
+        {
+            // slow-path: handles full table lookup
+            setobj2s(L, ra + 1, rb);
+            L->cachedslot = LUAU_INSN_C(insn);
+            VM_PROTECT(luaV_gettable(L, rb, kv, ra));
+            // save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
+            VM_PATCH_C(pc - 2, L->cachedslot);
+            // recompute ra since stack might have been reallocated
+            ra = VM_REG(LUAU_INSN_A(insn));
+            if (ttisnil(ra))
+                luaG_methoderror(L, ra + 1, tsvalue(kv));
+        }
+    }
+    else
+    {
+        Table* mt = ttisuserdata(rb) ? uvalue(rb)->metatable : L->global->mt[ttype(rb)];
+        const TValue* tmi = 0;
+
+        // fast-path: metatable with __namecall
+        if (const TValue* fn = fasttm(L, mt, TM_NAMECALL))
+        {
+            // note: order of copies allows rb to alias ra+1 or ra
+            setobj2s(L, ra + 1, rb);
+            setobj2s(L, ra, fn);
+
+            L->namecall = tsvalue(kv);
+        }
+        else if ((tmi = fasttm(L, mt, TM_INDEX)) && ttistable(tmi))
+        {
+            Table* h = hvalue(tmi);
+            int slot = LUAU_INSN_C(insn) & h->nodemask8;
+            LuaNode* n = &h->node[slot];
+
+            // fast-path: metatable with __index that has method in expected slot
+            if (LUAU_LIKELY(ttisstring(gkey(n)) && tsvalue(gkey(n)) == tsvalue(kv) && !ttisnil(gval(n))))
+            {
+                // note: order of copies allows rb to alias ra+1 or ra
+                setobj2s(L, ra + 1, rb);
+                setobj2s(L, ra, gval(n));
+            }
+            else
+            {
+                // slow-path: handles slot mismatch
+                setobj2s(L, ra + 1, rb);
+                L->cachedslot = slot;
+                VM_PROTECT(luaV_gettable(L, rb, kv, ra));
+                // save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
+                VM_PATCH_C(pc - 2, L->cachedslot);
+                // recompute ra since stack might have been reallocated
+                ra = VM_REG(LUAU_INSN_A(insn));
+                if (ttisnil(ra))
+                    luaG_methoderror(L, ra + 1, tsvalue(kv));
+            }
+        }
+        else
+        {
+            // slow-path: handles non-table __index
+            setobj2s(L, ra + 1, rb);
+            VM_PROTECT(luaV_gettable(L, rb, kv, ra));
+            // recompute ra since stack might have been reallocated
+            ra = VM_REG(LUAU_INSN_A(insn));
+            if (ttisnil(ra))
+                luaG_methoderror(L, ra + 1, tsvalue(kv));
+        }
+    }
+
+    // intentional fallthrough to CALL
+    LUAU_ASSERT(LUAU_INSN_OP(*pc) == LOP_CALL);
+    return pc;
+}
+
+const Instruction* executeSETLIST(lua_State* L, const Instruction* pc, StkId base, TValue* k)
+{
+    [[maybe_unused]] Closure* cl = clvalue(L->ci->func);
+    Instruction insn = *pc++;
+    StkId ra = VM_REG(LUAU_INSN_A(insn));
+    StkId rb = &base[LUAU_INSN_B(insn)]; // note: this can point to L->top if c == LUA_MULTRET making VM_REG unsafe to use
+    int c = LUAU_INSN_C(insn) - 1;
+    uint32_t index = *pc++;
+
+    if (c == LUA_MULTRET)
+    {
+        c = int(L->top - rb);
+        L->top = L->ci->top;
+    }
+
+    Table* h = hvalue(ra);
+
+    // TODO: we really don't need this anymore
+    if (!ttistable(ra))
+        return NULL; // temporary workaround to weaken a rather powerful exploitation primitive in case of a MITM attack on bytecode
+
+    int last = index + c - 1;
+    if (last > h->sizearray)
+    {
+        VM_PROTECT_PC(); // luaH_resizearray may fail due to OOM
+
+        luaH_resizearray(L, h, last);
+    }
+
+    TValue* array = h->array;
+
+    for (int i = 0; i < c; ++i)
+        setobj2t(L, &array[index + i - 1], rb + i);
+
+    luaC_barrierfast(L, h);
+    return pc;
+}
+
+const Instruction* executeFORGPREP(lua_State* L, const Instruction* pc, StkId base, TValue* k)
+{
+    [[maybe_unused]] Closure* cl = clvalue(L->ci->func);
+    Instruction insn = *pc++;
+    StkId ra = VM_REG(LUAU_INSN_A(insn));
+
+    if (ttisfunction(ra))
+    {
+        // will be called during FORGLOOP
+    }
+    else
+    {
+        Table* mt = ttistable(ra) ? hvalue(ra)->metatable : ttisuserdata(ra) ? uvalue(ra)->metatable : cast_to(Table*, NULL);
+
+        if (const TValue* fn = fasttm(L, mt, TM_ITER))
+        {
+            setobj2s(L, ra + 1, ra);
+            setobj2s(L, ra, fn);
+
+            L->top = ra + 2; // func + self arg
+            LUAU_ASSERT(L->top <= L->stack_last);
+
+            VM_PROTECT(luaD_call(L, ra, 3));
+            L->top = L->ci->top;
+
+            // recompute ra since stack might have been reallocated
+            ra = VM_REG(LUAU_INSN_A(insn));
+
+            // protect against __iter returning nil, since nil is used as a marker for builtin iteration in FORGLOOP
+            if (ttisnil(ra))
+            {
+                VM_PROTECT_PC(); // next call always errors
+                luaG_typeerror(L, ra, "call");
+            }
+        }
+        else if (fasttm(L, mt, TM_CALL))
+        {
+            // table or userdata with __call, will be called during FORGLOOP
+            // TODO: we might be able to stop supporting this depending on whether it's used in practice
+        }
+        else if (ttistable(ra))
+        {
+            // set up registers for builtin iteration
+            setobj2s(L, ra + 1, ra);
+            setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(0)));
+            setnilvalue(ra);
+        }
+        else
+        {
+            VM_PROTECT_PC(); // next call always errors
+            luaG_typeerror(L, ra, "iterate over");
+        }
+    }
+
+    pc += LUAU_INSN_D(insn);
+    LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
+    return pc;
+}
+
+const Instruction* executeGETVARARGS(lua_State* L, const Instruction* pc, StkId base, TValue* k)
+{
+    [[maybe_unused]] Closure* cl = clvalue(L->ci->func);
+    Instruction insn = *pc++;
+    int b = LUAU_INSN_B(insn) - 1;
+    int n = cast_int(base - L->ci->func) - cl->l.p->numparams - 1;
+
+    if (b == LUA_MULTRET)
+    {
+        VM_PROTECT(luaD_checkstack(L, n));
+        StkId ra = VM_REG(LUAU_INSN_A(insn)); // previous call may change the stack
+
+        for (int j = 0; j < n; j++)
+            setobj2s(L, ra + j, base - n + j);
+
+        L->top = ra + n;
+        return pc;
+    }
+    else
+    {
+        StkId ra = VM_REG(LUAU_INSN_A(insn));
+
+        for (int j = 0; j < b && j < n; j++)
+            setobj2s(L, ra + j, base - n + j);
+        for (int j = n; j < b; j++)
+            setnilvalue(ra + j);
+        return pc;
+    }
+}
+
+const Instruction* executeDUPCLOSURE(lua_State* L, const Instruction* pc, StkId base, TValue* k)
+{
+    [[maybe_unused]] Closure* cl = clvalue(L->ci->func);
+    Instruction insn = *pc++;
+    StkId ra = VM_REG(LUAU_INSN_A(insn));
+    TValue* kv = VM_KV(LUAU_INSN_D(insn));
+
+    Closure* kcl = clvalue(kv);
+
+    VM_PROTECT_PC(); // luaF_newLclosure may fail due to OOM
+
+    // clone closure if the environment is not shared
+    // note: we save closure to stack early in case the code below wants to capture it by value
+    Closure* ncl = (kcl->env == cl->env) ? kcl : luaF_newLclosure(L, kcl->nupvalues, cl->env, kcl->l.p);
+    setclvalue(L, ra, ncl);
+
+    // this loop does three things:
+    // - if the closure was created anew, it just fills it with upvalues
+    // - if the closure from the constant table is used, it fills it with upvalues so that it can be shared in the future
+    // - if the closure is reused, it checks if the reuse is safe via rawequal, and falls back to duplicating the closure
+    // normally this would use two separate loops, for reuse check and upvalue setup, but MSVC codegen goes crazy if you do that
+    for (int ui = 0; ui < kcl->nupvalues; ++ui)
+    {
+        Instruction uinsn = pc[ui];
+        LUAU_ASSERT(LUAU_INSN_OP(uinsn) == LOP_CAPTURE);
+        LUAU_ASSERT(LUAU_INSN_A(uinsn) == LCT_VAL || LUAU_INSN_A(uinsn) == LCT_UPVAL);
+
+        TValue* uv = (LUAU_INSN_A(uinsn) == LCT_VAL) ? VM_REG(LUAU_INSN_B(uinsn)) : VM_UV(LUAU_INSN_B(uinsn));
+
+        // check if the existing closure is safe to reuse
+        if (ncl == kcl && luaO_rawequalObj(&ncl->l.uprefs[ui], uv))
+            continue;
+
+        // lazily clone the closure and update the upvalues
+        if (ncl == kcl && kcl->preload == 0)
+        {
+            ncl = luaF_newLclosure(L, kcl->nupvalues, cl->env, kcl->l.p);
+            setclvalue(L, ra, ncl);
+
+            ui = -1; // restart the loop to fill all upvalues
+            continue;
+        }
+
+        // this updates a newly created closure, or an existing closure created during preload, in which case we need a barrier
+        setobj(L, &ncl->l.uprefs[ui], uv);
+        luaC_barrier(L, ncl, uv);
+    }
+
+    // this is a noop if ncl is newly created or shared successfully, but it has to run after the closure is preloaded for the first time
+    ncl->preload = 0;
+
+    if (kcl != ncl)
+        VM_PROTECT(luaC_checkGC(L));
+
+    pc += kcl->nupvalues;
+    return pc;
+}
+
+const Instruction* executePREPVARARGS(lua_State* L, const Instruction* pc, StkId base, TValue* k)
+{
+    [[maybe_unused]] Closure* cl = clvalue(L->ci->func);
+    Instruction insn = *pc++;
+    int numparams = LUAU_INSN_A(insn);
+
+    // all fixed parameters are copied after the top so we need more stack space
+    VM_PROTECT(luaD_checkstack(L, cl->stacksize + numparams));
+
+    // the caller must have filled extra fixed arguments with nil
+    LUAU_ASSERT(cast_int(L->top - base) >= numparams);
+
+    // move fixed parameters to final position
+    StkId fixed = base; // first fixed argument
+    base = L->top;      // final position of first argument
+
+    for (int i = 0; i < numparams; ++i)
+    {
+        setobj2s(L, base + i, fixed + i);
+        setnilvalue(fixed + i);
+    }
+
+    // rewire our stack frame to point to the new base
+    L->ci->base = base;
+    L->ci->top = base + cl->stacksize;
+
+    L->base = base;
+    L->top = L->ci->top;
+    return pc;
+}
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/CodeGenUtils.h
+++ b/luau/CodeGen/src/CodeGenUtils.h
@ -0,0 +1,36 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "lobject.h"
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+bool forgLoopTableIter(lua_State* L, Table* h, int index, TValue* ra);
+bool forgLoopNodeIter(lua_State* L, Table* h, int index, TValue* ra);
+bool forgLoopNonTableFallback(lua_State* L, int insnA, int aux);
+
+void forgPrepXnextFallback(lua_State* L, TValue* ra, int pc);
+
+Closure* callProlog(lua_State* L, TValue* ra, StkId argtop, int nresults);
+void callEpilogC(lua_State* L, int nresults, int n);
+
+Closure* callFallback(lua_State* L, StkId ra, StkId argtop, int nresults);
+Closure* returnFallback(lua_State* L, StkId ra, StkId valend);
+
+const Instruction* executeGETGLOBAL(lua_State* L, const Instruction* pc, StkId base, TValue* k);
+const Instruction* executeSETGLOBAL(lua_State* L, const Instruction* pc, StkId base, TValue* k);
+const Instruction* executeGETTABLEKS(lua_State* L, const Instruction* pc, StkId base, TValue* k);
+const Instruction* executeSETTABLEKS(lua_State* L, const Instruction* pc, StkId base, TValue* k);
+const Instruction* executeNEWCLOSURE(lua_State* L, const Instruction* pc, StkId base, TValue* k);
+const Instruction* executeNAMECALL(lua_State* L, const Instruction* pc, StkId base, TValue* k);
+const Instruction* executeSETLIST(lua_State* L, const Instruction* pc, StkId base, TValue* k);
+const Instruction* executeFORGPREP(lua_State* L, const Instruction* pc, StkId base, TValue* k);
+const Instruction* executeGETVARARGS(lua_State* L, const Instruction* pc, StkId base, TValue* k);
+const Instruction* executeDUPCLOSURE(lua_State* L, const Instruction* pc, StkId base, TValue* k);
+const Instruction* executePREPVARARGS(lua_State* L, const Instruction* pc, StkId base, TValue* k);
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/CodeGenX64.cpp
+++ b/luau/CodeGen/src/CodeGenX64.cpp
@ -0,0 +1,197 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "CodeGenX64.h"
+
+#include "Luau/AssemblyBuilderX64.h"
+#include "Luau/UnwindBuilder.h"
+
+#include "CustomExecUtils.h"
+#include "NativeState.h"
+#include "EmitCommonX64.h"
+
+#include "lstate.h"
+
+/* An overview of native environment stack setup that we are making in the entry function:
+ * Each line is 8 bytes, stack grows downwards.
+ *
+ * | ... previous frames ...
+ * | rdx home space | (unused)
+ * | rcx home space | (unused)
+ * | return address |
+ * | ... saved non-volatile registers ... <-- rsp + kStackSize + kLocalsSize
+ * | unused         | for 16 byte alignment of the stack
+ * | sCode          |
+ * | sClosure       | <-- rsp + kStackSize
+ * | argument 6     | <-- rsp + 40
+ * | argument 5     | <-- rsp + 32
+ * | r9 home space  |
+ * | r8 home space  |
+ * | rdx home space |
+ * | rcx home space | <-- rsp points here
+ *
+ * Arguments to our entry function are saved to home space only on Windows.
+ * Space for arguments to function we call is always reserved, but used only on Windows.
+ *
+ * Right now we use a frame pointer, but because of a fixed layout we can omit it in the future
+ */
+
+namespace Luau
+{
+namespace CodeGen
+{
+namespace X64
+{
+
+struct EntryLocations
+{
+    Label start;
+    Label prologueEnd;
+    Label epilogueStart;
+};
+
+static EntryLocations buildEntryFunction(AssemblyBuilderX64& build, UnwindBuilder& unwind)
+{
+    EntryLocations locations;
+
+    build.align(kFunctionAlignment, X64::AlignmentDataX64::Ud2);
+
+    locations.start = build.setLabel();
+    unwind.startFunction();
+
+    // Save common non-volatile registers
+    if (build.abi == ABIX64::SystemV)
+    {
+        // We need to use a standard rbp-based frame setup for debuggers to work with JIT code
+        build.push(rbp);
+        build.mov(rbp, rsp);
+    }
+
+    build.push(rbx);
+    build.push(r12);
+    build.push(r13);
+    build.push(r14);
+    build.push(r15);
+
+    if (build.abi == ABIX64::Windows)
+    {
+        // Save non-volatile registers that are specific to Windows x64 ABI
+        build.push(rdi);
+        build.push(rsi);
+
+        // On Windows, rbp is available as a general-purpose non-volatile register; we currently don't use it, but we need to push an even number
+        // of registers for stack alignment...
+        build.push(rbp);
+
+        // TODO: once we start using non-volatile SIMD registers on Windows, we will save those here
+    }
+
+    // Allocate stack space (reg home area + local data)
+    build.sub(rsp, kStackSize + kLocalsSize);
+
+    locations.prologueEnd = build.setLabel();
+
+    uint32_t prologueSize = build.getLabelOffset(locations.prologueEnd) - build.getLabelOffset(locations.start);
+
+    if (build.abi == ABIX64::SystemV)
+        unwind.prologueX64(prologueSize, kStackSize + kLocalsSize, /* setupFrame= */ true, {rbx, r12, r13, r14, r15});
+    else if (build.abi == ABIX64::Windows)
+        unwind.prologueX64(prologueSize, kStackSize + kLocalsSize, /* setupFrame= */ false, {rbx, r12, r13, r14, r15, rdi, rsi, rbp});
+
+    // Setup native execution environment
+    build.mov(rState, rArg1);
+    build.mov(rNativeContext, rArg4);
+    build.mov(rBase, qword[rState + offsetof(lua_State, base)]); // L->base
+    build.mov(rax, qword[rState + offsetof(lua_State, ci)]);     // L->ci
+    build.mov(rax, qword[rax + offsetof(CallInfo, func)]);       // L->ci->func
+    build.mov(rax, qword[rax + offsetof(TValue, value.gc)]);     // L->ci->func->value.gc aka cl
+    build.mov(sClosure, rax);
+    build.mov(rConstants, qword[rArg2 + offsetof(Proto, k)]); // proto->k
+    build.mov(rax, qword[rArg2 + offsetof(Proto, code)]);     // proto->code
+    build.mov(sCode, rax);
+
+    // Jump to the specified instruction; further control flow will be handled with custom ABI with register setup from EmitCommonX64.h
+    build.jmp(rArg3);
+
+    // Even though we jumped away, we will return here in the end
+    locations.epilogueStart = build.setLabel();
+
+    // Cleanup and exit
+    build.add(rsp, kStackSize + kLocalsSize);
+
+    if (build.abi == ABIX64::Windows)
+    {
+        build.pop(rbp);
+        build.pop(rsi);
+        build.pop(rdi);
+    }
+
+    build.pop(r15);
+    build.pop(r14);
+    build.pop(r13);
+    build.pop(r12);
+    build.pop(rbx);
+
+    if (build.abi == ABIX64::SystemV)
+        build.pop(rbp);
+
+    build.ret();
+
+    // Our entry function is special, it spans the whole remaining code area
+    unwind.finishFunction(build.getLabelOffset(locations.start), kFullBlockFuncton);
+
+    return locations;
+}
+
+bool initHeaderFunctions(NativeState& data)
+{
+    AssemblyBuilderX64 build(/* logText= */ false);
+    UnwindBuilder& unwind = *data.unwindBuilder.get();
+
+    unwind.startInfo(UnwindBuilder::X64);
+
+    EntryLocations entryLocations = buildEntryFunction(build, unwind);
+
+    build.finalize();
+
+    unwind.finishInfo();
+
+    LUAU_ASSERT(build.data.empty());
+
+    uint8_t* codeStart = nullptr;
+    if (!data.codeAllocator.allocate(
+            build.data.data(), int(build.data.size()), build.code.data(), int(build.code.size()), data.gateData, data.gateDataSize, codeStart))
+    {
+        LUAU_ASSERT(!"failed to create entry function");
+        return false;
+    }
+
+    // Set the offset at the begining so that functions in new blocks will not overlay the locations
+    // specified by the unwind information of the entry function
+    unwind.setBeginOffset(build.getLabelOffset(entryLocations.prologueEnd));
+
+    data.context.gateEntry = codeStart + build.getLabelOffset(entryLocations.start);
+    data.context.gateExit = codeStart + build.getLabelOffset(entryLocations.epilogueStart);
+
+    return true;
+}
+
+void assembleHelpers(X64::AssemblyBuilderX64& build, ModuleHelpers& helpers)
+{
+    if (build.logText)
+        build.logAppend("; exitContinueVm\n");
+    helpers.exitContinueVm = build.setLabel();
+    emitExit(build, /* continueInVm */ true);
+
+    if (build.logText)
+        build.logAppend("; exitNoContinueVm\n");
+    helpers.exitNoContinueVm = build.setLabel();
+    emitExit(build, /* continueInVm */ false);
+
+    if (build.logText)
+        build.logAppend("; continueCallInVm\n");
+    helpers.continueCallInVm = build.setLabel();
+    emitContinueCallInVm(build);
+}
+
+} // namespace X64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/CodeGenX64.h
+++ b/luau/CodeGen/src/CodeGenX64.h
@ -0,0 +1,22 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+struct NativeState;
+struct ModuleHelpers;
+
+namespace X64
+{
+
+class AssemblyBuilderX64;
+
+bool initHeaderFunctions(NativeState& data);
+void assembleHelpers(AssemblyBuilderX64& build, ModuleHelpers& helpers);
+
+} // namespace X64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/CustomExecUtils.h
+++ b/luau/CodeGen/src/CustomExecUtils.h
@ -0,0 +1,106 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "NativeState.h"
+
+#include "lobject.h"
+#include "lstate.h"
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+// Here we define helper functions to wrap interaction with Luau custom execution API so that it works with or without LUA_CUSTOM_EXECUTION
+
+#if LUA_CUSTOM_EXECUTION
+
+inline lua_ExecutionCallbacks* getExecutionCallbacks(lua_State* L)
+{
+    return &L->global->ecb;
+}
+
+inline NativeState* getNativeState(lua_State* L)
+{
+    lua_ExecutionCallbacks* ecb = getExecutionCallbacks(L);
+    return (NativeState*)ecb->context;
+}
+
+inline void setNativeState(lua_State* L, NativeState* nativeState)
+{
+    lua_ExecutionCallbacks* ecb = getExecutionCallbacks(L);
+    ecb->context = nativeState;
+}
+
+inline NativeState* createNativeState(lua_State* L)
+{
+    NativeState* state = new NativeState();
+    setNativeState(L, state);
+    return state;
+}
+
+inline void destroyNativeState(lua_State* L)
+{
+    NativeState* state = getNativeState(L);
+    setNativeState(L, nullptr);
+    delete state;
+}
+
+#else
+
+inline lua_ExecutionCallbacks* getExecutionCallbacks(lua_State* L)
+{
+    return nullptr;
+}
+
+inline NativeState* getNativeState(lua_State* L)
+{
+    return nullptr;
+}
+
+inline void setNativeState(lua_State* L, NativeState* nativeState) {}
+
+inline NativeState* createNativeState(lua_State* L)
+{
+    return nullptr;
+}
+
+inline void destroyNativeState(lua_State* L) {}
+
+#endif
+
+inline int getOpLength(LuauOpcode op)
+{
+    switch (op)
+    {
+    case LOP_GETGLOBAL:
+    case LOP_SETGLOBAL:
+    case LOP_GETIMPORT:
+    case LOP_GETTABLEKS:
+    case LOP_SETTABLEKS:
+    case LOP_NAMECALL:
+    case LOP_JUMPIFEQ:
+    case LOP_JUMPIFLE:
+    case LOP_JUMPIFLT:
+    case LOP_JUMPIFNOTEQ:
+    case LOP_JUMPIFNOTLE:
+    case LOP_JUMPIFNOTLT:
+    case LOP_NEWTABLE:
+    case LOP_SETLIST:
+    case LOP_FORGLOOP:
+    case LOP_LOADKX:
+    case LOP_FASTCALL2:
+    case LOP_FASTCALL2K:
+    case LOP_JUMPXEQKNIL:
+    case LOP_JUMPXEQKB:
+    case LOP_JUMPXEQKN:
+    case LOP_JUMPXEQKS:
+        return 2;
+
+    default:
+        return 1;
+    }
+}
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/EmitBuiltinsX64.cpp
+++ b/luau/CodeGen/src/EmitBuiltinsX64.cpp
@ -0,0 +1,128 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "EmitBuiltinsX64.h"
+
+#include "Luau/AssemblyBuilderX64.h"
+#include "Luau/Bytecode.h"
+#include "Luau/IrCallWrapperX64.h"
+#include "Luau/IrRegAllocX64.h"
+
+#include "EmitCommonX64.h"
+#include "NativeState.h"
+
+#include "lstate.h"
+
+namespace Luau
+{
+namespace CodeGen
+{
+namespace X64
+{
+
+static void emitBuiltinMathFrexp(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int arg, int nresults)
+{
+    IrCallWrapperX64 callWrap(regs, build);
+    callWrap.addArgument(SizeX64::xmmword, luauRegValue(arg));
+    callWrap.addArgument(SizeX64::qword, sTemporarySlot);
+    callWrap.call(qword[rNativeContext + offsetof(NativeContext, libm_frexp)]);
+
+    build.vmovsd(luauRegValue(ra), xmm0);
+
+    if (nresults > 1)
+    {
+        build.vcvtsi2sd(xmm0, xmm0, dword[sTemporarySlot + 0]);
+        build.vmovsd(luauRegValue(ra + 1), xmm0);
+    }
+}
+
+static void emitBuiltinMathModf(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int arg, int nresults)
+{
+    IrCallWrapperX64 callWrap(regs, build);
+    callWrap.addArgument(SizeX64::xmmword, luauRegValue(arg));
+    callWrap.addArgument(SizeX64::qword, sTemporarySlot);
+    callWrap.call(qword[rNativeContext + offsetof(NativeContext, libm_modf)]);
+
+    build.vmovsd(xmm1, qword[sTemporarySlot + 0]);
+    build.vmovsd(luauRegValue(ra), xmm1);
+
+    if (nresults > 1)
+        build.vmovsd(luauRegValue(ra + 1), xmm0);
+}
+
+static void emitBuiltinMathSign(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int arg)
+{
+    ScopedRegX64 tmp0{regs, SizeX64::xmmword};
+    ScopedRegX64 tmp1{regs, SizeX64::xmmword};
+    ScopedRegX64 tmp2{regs, SizeX64::xmmword};
+    ScopedRegX64 tmp3{regs, SizeX64::xmmword};
+
+    build.vmovsd(tmp0.reg, luauRegValue(arg));
+    build.vxorpd(tmp1.reg, tmp1.reg, tmp1.reg);
+
+    // Set tmp2 to -1 if arg < 0, else 0
+    build.vcmpltsd(tmp2.reg, tmp0.reg, tmp1.reg);
+    build.vmovsd(tmp3.reg, build.f64(-1));
+    build.vandpd(tmp2.reg, tmp2.reg, tmp3.reg);
+
+    // Set mask bit to 1 if 0 < arg, else 0
+    build.vcmpltsd(tmp0.reg, tmp1.reg, tmp0.reg);
+
+    // Result = (mask-bit == 1) ? 1.0 : tmp2
+    // If arg < 0 then tmp2 is -1 and mask-bit is 0, result is -1
+    // If arg == 0 then tmp2 is 0 and mask-bit is 0, result is 0
+    // If arg > 0 then tmp2 is 0 and mask-bit is 1, result is 1
+    build.vblendvpd(tmp0.reg, tmp2.reg, build.f64x2(1, 1), tmp0.reg);
+
+    build.vmovsd(luauRegValue(ra), tmp0.reg);
+}
+
+static void emitBuiltinType(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int arg)
+{
+    ScopedRegX64 tmp0{regs, SizeX64::qword};
+    ScopedRegX64 tag{regs, SizeX64::dword};
+
+    build.mov(tag.reg, luauRegTag(arg));
+
+    build.mov(tmp0.reg, qword[rState + offsetof(lua_State, global)]);
+    build.mov(tmp0.reg, qword[tmp0.reg + qwordReg(tag.reg) * sizeof(TString*) + offsetof(global_State, ttname)]);
+
+    build.mov(luauRegValue(ra), tmp0.reg);
+}
+
+static void emitBuiltinTypeof(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int arg)
+{
+    IrCallWrapperX64 callWrap(regs, build);
+    callWrap.addArgument(SizeX64::qword, rState);
+    callWrap.addArgument(SizeX64::qword, luauRegAddress(arg));
+    callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaT_objtypenamestr)]);
+
+    build.mov(luauRegValue(ra), rax);
+}
+
+void emitBuiltin(IrRegAllocX64& regs, AssemblyBuilderX64& build, int bfid, int ra, int arg, OperandX64 arg2, int nparams, int nresults)
+{
+    switch (bfid)
+    {
+    case LBF_MATH_FREXP:
+        LUAU_ASSERT(nparams == 1 && (nresults == 1 || nresults == 2));
+        return emitBuiltinMathFrexp(regs, build, ra, arg, nresults);
+    case LBF_MATH_MODF:
+        LUAU_ASSERT(nparams == 1 && (nresults == 1 || nresults == 2));
+        return emitBuiltinMathModf(regs, build, ra, arg, nresults);
+    case LBF_MATH_SIGN:
+        LUAU_ASSERT(nparams == 1 && nresults == 1);
+        return emitBuiltinMathSign(regs, build, ra, arg);
+    case LBF_TYPE:
+        LUAU_ASSERT(nparams == 1 && nresults == 1);
+        return emitBuiltinType(regs, build, ra, arg);
+    case LBF_TYPEOF:
+        LUAU_ASSERT(nparams == 1 && nresults == 1);
+        return emitBuiltinTypeof(regs, build, ra, arg);
+    default:
+        LUAU_ASSERT(!"Missing x64 lowering");
+        break;
+    }
+}
+
+} // namespace X64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/EmitBuiltinsX64.h
+++ b/luau/CodeGen/src/EmitBuiltinsX64.h
@ -0,0 +1,23 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+struct Label;
+struct IrOp;
+
+namespace X64
+{
+
+class AssemblyBuilderX64;
+struct OperandX64;
+struct IrRegAllocX64;
+
+void emitBuiltin(IrRegAllocX64& regs, AssemblyBuilderX64& build, int bfid, int ra, int arg, OperandX64 arg2, int nparams, int nresults);
+
+} // namespace X64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/EmitCommon.h
+++ b/luau/CodeGen/src/EmitCommon.h
@ -0,0 +1,37 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/Label.h"
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+constexpr unsigned kTValueSizeLog2 = 4;
+constexpr unsigned kLuaNodeSizeLog2 = 5;
+
+// TKey.tt and TKey.next are packed together in a bitfield
+constexpr unsigned kOffsetOfTKeyTagNext = 12;  // offsetof cannot be used on a bit field
+constexpr unsigned kTKeyTagBits = 4;
+constexpr unsigned kTKeyTagMask = (1 << kTKeyTagBits) - 1;
+
+constexpr unsigned kOffsetOfInstructionC = 3;
+
+// Leaf functions that are placed in every module to perform common instruction sequences
+struct ModuleHelpers
+{
+    // A64/X64
+    Label exitContinueVm;
+    Label exitNoContinueVm;
+
+    // X64
+    Label continueCallInVm;
+
+    // A64
+    Label reentry;   // x0: closure
+    Label interrupt; // x0: pc offset, x1: return address, x2: interrupt
+};
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/EmitCommonA64.h
+++ b/luau/CodeGen/src/EmitCommonA64.h
@ -0,0 +1,59 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/AssemblyBuilderA64.h"
+
+#include "EmitCommon.h"
+
+#include "lobject.h"
+#include "ltm.h"
+#include "lstate.h"
+
+// AArch64 ABI reminder:
+// Arguments: x0-x7, v0-v7
+// Return: x0, v0 (or x8 that points to the address of the resulting structure)
+// Volatile: x9-x15, v16-v31 ("caller-saved", any call may change them)
+// Intra-procedure-call temporary: x16-x17 (any call or relocated jump may change them, as linker may point branches to veneers to perform long jumps)
+// Non-volatile: x19-x28, v8-v15 ("callee-saved", preserved after calls, only bottom half of SIMD registers is preserved!)
+// Reserved: x18: reserved for platform use; x29: frame pointer (unless omitted); x30: link register; x31: stack pointer
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+struct NativeState;
+
+namespace A64
+{
+
+// Data that is very common to access is placed in non-volatile registers:
+// 1. Constant registers (only loaded during codegen entry)
+constexpr RegisterA64 rState = x19;         // lua_State* L
+constexpr RegisterA64 rNativeContext = x20; // NativeContext* context
+
+// 2. Frame registers (reloaded when call frame changes; rBase is also reloaded after all calls that may reallocate stack)
+constexpr RegisterA64 rConstants = x21; // TValue* k
+constexpr RegisterA64 rClosure = x22;   // Closure* cl
+constexpr RegisterA64 rCode = x23;      // Instruction* code
+constexpr RegisterA64 rBase = x24;      // StkId base
+
+// Native code is as stackless as the interpreter, so we can place some data on the stack once and have it accessible at any point
+// See CodeGenA64.cpp for layout
+constexpr unsigned kStashSlots = 8;  // stashed non-volatile registers
+constexpr unsigned kSpillSlots = 22; // slots for spilling temporary registers
+constexpr unsigned kTempSlots = 2;   // 16 bytes of temporary space, such luxury!
+
+constexpr unsigned kStackSize = (kStashSlots + kSpillSlots + kTempSlots) * 8;
+
+constexpr AddressA64 sSpillArea = mem(sp, kStashSlots * 8);
+constexpr AddressA64 sTemporary = mem(sp, (kStashSlots + kSpillSlots) * 8);
+
+inline void emitUpdateBase(AssemblyBuilderA64& build)
+{
+    build.ldr(rBase, mem(rState, offsetof(lua_State, base)));
+}
+
+} // namespace A64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/EmitCommonX64.cpp
+++ b/luau/CodeGen/src/EmitCommonX64.cpp
@ -0,0 +1,358 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "EmitCommonX64.h"
+
+#include "Luau/AssemblyBuilderX64.h"
+#include "Luau/IrCallWrapperX64.h"
+#include "Luau/IrData.h"
+#include "Luau/IrRegAllocX64.h"
+
+#include "CustomExecUtils.h"
+#include "NativeState.h"
+
+#include "lgc.h"
+#include "lstate.h"
+
+namespace Luau
+{
+namespace CodeGen
+{
+namespace X64
+{
+
+void jumpOnNumberCmp(AssemblyBuilderX64& build, RegisterX64 tmp, OperandX64 lhs, OperandX64 rhs, IrCondition cond, Label& label)
+{
+    // Refresher on comi/ucomi EFLAGS:
+    // CF only: less
+    // ZF only: equal
+    // PF+CF+ZF: unordered (NaN)
+
+    if (rhs.cat == CategoryX64::reg)
+    {
+        build.vucomisd(rhs, lhs);
+    }
+    else
+    {
+        build.vmovsd(tmp, rhs);
+        build.vucomisd(tmp, lhs);
+    }
+
+    // Keep in mind that 'Not' conditions want 'true' for comparisons with NaN
+    // And because of NaN, integer check interchangeability like 'not less or equal' <-> 'greater' does not hold
+    switch (cond)
+    {
+    case IrCondition::NotLessEqual:
+        // (b < a) is the same as !(a <= b). jnae checks CF=1 which means < or NaN
+        build.jcc(ConditionX64::NotAboveEqual, label);
+        break;
+    case IrCondition::LessEqual:
+        // (b >= a) is the same as (a <= b). jae checks CF=0 which means >= and not NaN
+        build.jcc(ConditionX64::AboveEqual, label);
+        break;
+    case IrCondition::NotLess:
+        // (b <= a) is the same as !(a < b). jna checks CF=1 or ZF=1 which means <= or NaN
+        build.jcc(ConditionX64::NotAbove, label);
+        break;
+    case IrCondition::Less:
+        // (b > a) is the same as (a < b). ja checks CF=0 and ZF=0 which means > and not NaN
+        build.jcc(ConditionX64::Above, label);
+        break;
+    case IrCondition::NotEqual:
+        // ZF=0 or PF=1 means != or NaN
+        build.jcc(ConditionX64::NotZero, label);
+        build.jcc(ConditionX64::Parity, label);
+        break;
+    default:
+        LUAU_ASSERT(!"Unsupported condition");
+    }
+}
+
+void jumpOnAnyCmpFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, IrCondition cond, Label& label)
+{
+    IrCallWrapperX64 callWrap(regs, build);
+    callWrap.addArgument(SizeX64::qword, rState);
+    callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
+    callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
+
+    if (cond == IrCondition::NotLessEqual || cond == IrCondition::LessEqual)
+        callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_lessequal)]);
+    else if (cond == IrCondition::NotLess || cond == IrCondition::Less)
+        callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_lessthan)]);
+    else if (cond == IrCondition::NotEqual || cond == IrCondition::Equal)
+        callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_equalval)]);
+    else
+        LUAU_ASSERT(!"Unsupported condition");
+
+    emitUpdateBase(build);
+    build.test(eax, eax);
+    build.jcc(cond == IrCondition::NotLessEqual || cond == IrCondition::NotLess || cond == IrCondition::NotEqual ? ConditionX64::Zero
+                                                                                                                 : ConditionX64::NotZero,
+        label);
+}
+
+void getTableNodeAtCachedSlot(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 node, RegisterX64 table, int pcpos)
+{
+    LUAU_ASSERT(tmp != node);
+    LUAU_ASSERT(table != node);
+
+    build.mov(node, qword[table + offsetof(Table, node)]);
+
+    // compute cached slot
+    build.mov(tmp, sCode);
+    build.movzx(dwordReg(tmp), byte[tmp + pcpos * sizeof(Instruction) + kOffsetOfInstructionC]);
+    build.and_(byteReg(tmp), byte[table + offsetof(Table, nodemask8)]);
+
+    // LuaNode* n = &h->node[slot];
+    build.shl(dwordReg(tmp), kLuaNodeSizeLog2);
+    build.add(node, tmp);
+}
+
+void convertNumberToIndexOrJump(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 numd, RegisterX64 numi, Label& label)
+{
+    LUAU_ASSERT(numi.size == SizeX64::dword);
+
+    // Convert to integer, NaN is converted into 0x80000000
+    build.vcvttsd2si(numi, numd);
+
+    // Convert that integer back to double
+    build.vcvtsi2sd(tmp, numd, numi);
+
+    build.vucomisd(tmp, numd); // Sets ZF=1 if equal or NaN
+    // We don't need non-integer values
+    // But to skip the PF=1 check, we proceed with NaN because 0x80000000 index is out of bounds
+    build.jcc(ConditionX64::NotZero, label);
+}
+
+void callArithHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, OperandX64 c, TMS tm)
+{
+    IrCallWrapperX64 callWrap(regs, build);
+    callWrap.addArgument(SizeX64::qword, rState);
+    callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
+    callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
+    callWrap.addArgument(SizeX64::qword, c);
+    callWrap.addArgument(SizeX64::dword, tm);
+    callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_doarith)]);
+
+    emitUpdateBase(build);
+}
+
+void callLengthHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb)
+{
+    IrCallWrapperX64 callWrap(regs, build);
+    callWrap.addArgument(SizeX64::qword, rState);
+    callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
+    callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
+    callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_dolen)]);
+
+    emitUpdateBase(build);
+}
+
+void callPrepareForN(IrRegAllocX64& regs, AssemblyBuilderX64& build, int limit, int step, int init)
+{
+    IrCallWrapperX64 callWrap(regs, build);
+    callWrap.addArgument(SizeX64::qword, rState);
+    callWrap.addArgument(SizeX64::qword, luauRegAddress(limit));
+    callWrap.addArgument(SizeX64::qword, luauRegAddress(step));
+    callWrap.addArgument(SizeX64::qword, luauRegAddress(init));
+    callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_prepareFORN)]);
+}
+
+void callGetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra)
+{
+    IrCallWrapperX64 callWrap(regs, build);
+    callWrap.addArgument(SizeX64::qword, rState);
+    callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
+    callWrap.addArgument(SizeX64::qword, c);
+    callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
+    callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_gettable)]);
+
+    emitUpdateBase(build);
+}
+
+void callSetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra)
+{
+    IrCallWrapperX64 callWrap(regs, build);
+    callWrap.addArgument(SizeX64::qword, rState);
+    callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
+    callWrap.addArgument(SizeX64::qword, c);
+    callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
+    callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_settable)]);
+
+    emitUpdateBase(build);
+}
+
+void checkObjectBarrierConditions(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 object, int ra, Label& skip)
+{
+    // iscollectable(ra)
+    build.cmp(luauRegTag(ra), LUA_TSTRING);
+    build.jcc(ConditionX64::Less, skip);
+
+    // isblack(obj2gco(o))
+    build.test(byte[object + offsetof(GCheader, marked)], bitmask(BLACKBIT));
+    build.jcc(ConditionX64::Zero, skip);
+
+    // iswhite(gcvalue(ra))
+    build.mov(tmp, luauRegValue(ra));
+    build.test(byte[tmp + offsetof(GCheader, marked)], bit2mask(WHITE0BIT, WHITE1BIT));
+    build.jcc(ConditionX64::Zero, skip);
+}
+
+void callBarrierObject(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 object, IrOp objectOp, int ra)
+{
+    Label skip;
+
+    ScopedRegX64 tmp{regs, SizeX64::qword};
+    checkObjectBarrierConditions(build, tmp.reg, object, ra, skip);
+
+    {
+        ScopedSpills spillGuard(regs);
+
+        IrCallWrapperX64 callWrap(regs, build);
+        callWrap.addArgument(SizeX64::qword, rState);
+        callWrap.addArgument(SizeX64::qword, object, objectOp);
+        callWrap.addArgument(SizeX64::qword, tmp);
+        callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_barrierf)]);
+    }
+
+    build.setLabel(skip);
+}
+
+void callBarrierTableFast(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 table, IrOp tableOp)
+{
+    Label skip;
+
+    // isblack(obj2gco(t))
+    build.test(byte[table + offsetof(GCheader, marked)], bitmask(BLACKBIT));
+    build.jcc(ConditionX64::Zero, skip);
+
+    {
+        ScopedSpills spillGuard(regs);
+
+        IrCallWrapperX64 callWrap(regs, build);
+        callWrap.addArgument(SizeX64::qword, rState);
+        callWrap.addArgument(SizeX64::qword, table, tableOp);
+        callWrap.addArgument(SizeX64::qword, addr[table + offsetof(Table, gclist)]);
+        callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_barrierback)]);
+    }
+
+    build.setLabel(skip);
+}
+
+void callStepGc(IrRegAllocX64& regs, AssemblyBuilderX64& build)
+{
+    Label skip;
+
+    {
+        ScopedRegX64 tmp1{regs, SizeX64::qword};
+        ScopedRegX64 tmp2{regs, SizeX64::qword};
+
+        build.mov(tmp1.reg, qword[rState + offsetof(lua_State, global)]);
+        build.mov(tmp2.reg, qword[tmp1.reg + offsetof(global_State, totalbytes)]);
+        build.cmp(tmp2.reg, qword[tmp1.reg + offsetof(global_State, GCthreshold)]);
+        build.jcc(ConditionX64::Below, skip);
+    }
+
+    {
+        ScopedSpills spillGuard(regs);
+
+        IrCallWrapperX64 callWrap(regs, build);
+        callWrap.addArgument(SizeX64::qword, rState);
+        callWrap.addArgument(SizeX64::dword, 1);
+        callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_step)]);
+        emitUpdateBase(build);
+    }
+
+    build.setLabel(skip);
+}
+
+void emitExit(AssemblyBuilderX64& build, bool continueInVm)
+{
+    if (continueInVm)
+        build.mov(eax, 1);
+    else
+        build.xor_(eax, eax);
+
+    build.jmp(qword[rNativeContext + offsetof(NativeContext, gateExit)]);
+}
+
+void emitUpdateBase(AssemblyBuilderX64& build)
+{
+    build.mov(rBase, qword[rState + offsetof(lua_State, base)]);
+}
+
+static void emitSetSavedPc(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos)
+{
+    ScopedRegX64 tmp1{regs, SizeX64::qword};
+    ScopedRegX64 tmp2{regs, SizeX64::qword};
+
+    build.mov(tmp1.reg, sCode);
+    build.add(tmp1.reg, pcpos * sizeof(Instruction));
+    build.mov(tmp2.reg, qword[rState + offsetof(lua_State, ci)]);
+    build.mov(qword[tmp2.reg + offsetof(CallInfo, savedpc)], tmp1.reg);
+}
+
+void emitInterrupt(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos)
+{
+    Label skip;
+
+    ScopedRegX64 tmp{regs, SizeX64::qword};
+
+    // Skip if there is no interrupt set
+    build.mov(tmp.reg, qword[rState + offsetof(lua_State, global)]);
+    build.mov(tmp.reg, qword[tmp.reg + offsetof(global_State, cb.interrupt)]);
+    build.test(tmp.reg, tmp.reg);
+    build.jcc(ConditionX64::Zero, skip);
+
+    emitSetSavedPc(regs, build, pcpos + 1);
+
+    // Call interrupt
+    // TODO: This code should move to the end of the function, or even be outlined so that it can be shared by multiple interruptible instructions
+    IrCallWrapperX64 callWrap(regs, build);
+    callWrap.addArgument(SizeX64::qword, rState);
+    callWrap.addArgument(SizeX64::dword, -1);
+    callWrap.call(tmp.release());
+
+    emitUpdateBase(build); // interrupt may have reallocated stack
+
+    // Check if we need to exit
+    build.mov(al, byte[rState + offsetof(lua_State, status)]);
+    build.test(al, al);
+    build.jcc(ConditionX64::Zero, skip);
+
+    build.mov(rax, qword[rState + offsetof(lua_State, ci)]);
+    build.sub(qword[rax + offsetof(CallInfo, savedpc)], sizeof(Instruction));
+    emitExit(build, /* continueInVm */ false);
+
+    build.setLabel(skip);
+}
+
+void emitFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int offset, int pcpos)
+{
+    // fallback(L, instruction, base, k)
+    IrCallWrapperX64 callWrap(regs, build);
+    callWrap.addArgument(SizeX64::qword, rState);
+
+    RegisterX64 reg = callWrap.suggestNextArgumentRegister(SizeX64::qword);
+    build.mov(reg, sCode);
+    callWrap.addArgument(SizeX64::qword, addr[reg + pcpos * sizeof(Instruction)]);
+
+    callWrap.addArgument(SizeX64::qword, rBase);
+    callWrap.addArgument(SizeX64::qword, rConstants);
+    callWrap.call(qword[rNativeContext + offset]);
+
+    emitUpdateBase(build);
+}
+
+void emitContinueCallInVm(AssemblyBuilderX64& build)
+{
+    RegisterX64 proto = rcx; // Sync with emitInstCall
+
+    build.mov(rdx, qword[proto + offsetof(Proto, code)]);
+    build.mov(rax, qword[rState + offsetof(lua_State, ci)]);
+    build.mov(qword[rax + offsetof(CallInfo, savedpc)], rdx);
+
+    emitExit(build, /* continueInVm */ true);
+}
+
+} // namespace X64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/EmitCommonX64.h
+++ b/luau/CodeGen/src/EmitCommonX64.h
@ -0,0 +1,239 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/AssemblyBuilderX64.h"
+
+#include "EmitCommon.h"
+
+#include "lobject.h"
+#include "ltm.h"
+
+// MS x64 ABI reminder:
+// Arguments: rcx, rdx, r8, r9 ('overlapped' with xmm0-xmm3)
+// Return: rax, xmm0
+// Nonvolatile: r12-r15, rdi, rsi, rbx, rbp
+// SIMD: only xmm6-xmm15 are non-volatile, all ymm upper parts are volatile
+
+// AMD64 ABI reminder:
+// Arguments: rdi, rsi, rdx, rcx, r8, r9 (xmm0-xmm7)
+// Return: rax, rdx, xmm0, xmm1
+// Nonvolatile: r12-r15, rbx, rbp
+// SIMD: all volatile
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+enum class IrCondition : uint8_t;
+struct NativeState;
+struct IrOp;
+
+namespace X64
+{
+
+struct IrRegAllocX64;
+
+constexpr uint32_t kFunctionAlignment = 32;
+
+// Data that is very common to access is placed in non-volatile registers
+constexpr RegisterX64 rState = r15;         // lua_State* L
+constexpr RegisterX64 rBase = r14;          // StkId base
+constexpr RegisterX64 rNativeContext = r13; // NativeContext* context
+constexpr RegisterX64 rConstants = r12;     // TValue* k
+
+// Native code is as stackless as the interpreter, so we can place some data on the stack once and have it accessible at any point
+// See CodeGenX64.cpp for layout
+constexpr unsigned kStackSize = 32 + 16;               // 4 home locations for registers, 16 bytes for additional function call arguments
+constexpr unsigned kSpillSlots = 4;                    // locations for register allocator to spill data into
+constexpr unsigned kLocalsSize = 24 + 8 * kSpillSlots; // 3 extra slots for our custom locals (also aligns the stack to 16 byte boundary)
+
+constexpr OperandX64 sClosure = qword[rsp + kStackSize + 0]; // Closure* cl
+constexpr OperandX64 sCode = qword[rsp + kStackSize + 8];    // Instruction* code
+constexpr OperandX64 sTemporarySlot = addr[rsp + kStackSize + 16];
+constexpr OperandX64 sSpillArea = addr[rsp + kStackSize + 24];
+
+// TODO: These should be replaced with a portable call function that checks the ABI at runtime and reorders moves accordingly to avoid conflicts
+#if defined(_WIN32)
+
+constexpr RegisterX64 rArg1 = rcx;
+constexpr RegisterX64 rArg2 = rdx;
+constexpr RegisterX64 rArg3 = r8;
+constexpr RegisterX64 rArg4 = r9;
+constexpr RegisterX64 rArg5 = noreg;
+constexpr RegisterX64 rArg6 = noreg;
+constexpr OperandX64 sArg5 = qword[rsp + 32];
+constexpr OperandX64 sArg6 = qword[rsp + 40];
+
+#else
+
+constexpr RegisterX64 rArg1 = rdi;
+constexpr RegisterX64 rArg2 = rsi;
+constexpr RegisterX64 rArg3 = rdx;
+constexpr RegisterX64 rArg4 = rcx;
+constexpr RegisterX64 rArg5 = r8;
+constexpr RegisterX64 rArg6 = r9;
+constexpr OperandX64 sArg5 = noreg;
+constexpr OperandX64 sArg6 = noreg;
+
+#endif
+
+inline OperandX64 luauReg(int ri)
+{
+    return xmmword[rBase + ri * sizeof(TValue)];
+}
+
+inline OperandX64 luauRegAddress(int ri)
+{
+    return addr[rBase + ri * sizeof(TValue)];
+}
+
+inline OperandX64 luauRegValue(int ri)
+{
+    return qword[rBase + ri * sizeof(TValue) + offsetof(TValue, value)];
+}
+
+inline OperandX64 luauRegTag(int ri)
+{
+    return dword[rBase + ri * sizeof(TValue) + offsetof(TValue, tt)];
+}
+
+inline OperandX64 luauRegValueInt(int ri)
+{
+    return dword[rBase + ri * sizeof(TValue) + offsetof(TValue, value)];
+}
+
+inline OperandX64 luauRegValueVector(int ri, int index)
+{
+    return dword[rBase + ri * sizeof(TValue) + offsetof(TValue, value) + (sizeof(float) * index)];
+}
+
+inline OperandX64 luauConstant(int ki)
+{
+    return xmmword[rConstants + ki * sizeof(TValue)];
+}
+
+inline OperandX64 luauConstantAddress(int ki)
+{
+    return addr[rConstants + ki * sizeof(TValue)];
+}
+
+inline OperandX64 luauConstantTag(int ki)
+{
+    return dword[rConstants + ki * sizeof(TValue) + offsetof(TValue, tt)];
+}
+
+inline OperandX64 luauConstantValue(int ki)
+{
+    return qword[rConstants + ki * sizeof(TValue) + offsetof(TValue, value)];
+}
+
+inline OperandX64 luauNodeKeyValue(RegisterX64 node)
+{
+    return qword[node + offsetof(LuaNode, key) + offsetof(TKey, value)];
+}
+
+// Note: tag has dirty upper bits
+inline OperandX64 luauNodeKeyTag(RegisterX64 node)
+{
+    return dword[node + offsetof(LuaNode, key) + kOffsetOfTKeyTagNext];
+}
+
+inline OperandX64 luauNodeValue(RegisterX64 node)
+{
+    return xmmword[node + offsetof(LuaNode, val)];
+}
+
+inline void setLuauReg(AssemblyBuilderX64& build, RegisterX64 tmp, int ri, OperandX64 op)
+{
+    LUAU_ASSERT(op.cat == CategoryX64::mem);
+
+    build.vmovups(tmp, op);
+    build.vmovups(luauReg(ri), tmp);
+}
+
+inline void jumpIfTagIs(AssemblyBuilderX64& build, int ri, lua_Type tag, Label& label)
+{
+    build.cmp(luauRegTag(ri), tag);
+    build.jcc(ConditionX64::Equal, label);
+}
+
+inline void jumpIfTagIsNot(AssemblyBuilderX64& build, int ri, lua_Type tag, Label& label)
+{
+    build.cmp(luauRegTag(ri), tag);
+    build.jcc(ConditionX64::NotEqual, label);
+}
+
+// Note: fallthrough label should be placed after this condition
+inline void jumpIfFalsy(AssemblyBuilderX64& build, int ri, Label& target, Label& fallthrough)
+{
+    jumpIfTagIs(build, ri, LUA_TNIL, target);             // false if nil
+    jumpIfTagIsNot(build, ri, LUA_TBOOLEAN, fallthrough); // true if not nil or boolean
+
+    build.cmp(luauRegValueInt(ri), 0);
+    build.jcc(ConditionX64::Equal, target); // true if boolean value is 'true'
+}
+
+// Note: fallthrough label should be placed after this condition
+inline void jumpIfTruthy(AssemblyBuilderX64& build, int ri, Label& target, Label& fallthrough)
+{
+    jumpIfTagIs(build, ri, LUA_TNIL, fallthrough);   // false if nil
+    jumpIfTagIsNot(build, ri, LUA_TBOOLEAN, target); // true if not nil or boolean
+
+    build.cmp(luauRegValueInt(ri), 0);
+    build.jcc(ConditionX64::NotEqual, target); // true if boolean value is 'true'
+}
+
+inline void jumpIfNodeKeyTagIsNot(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 node, lua_Type tag, Label& label)
+{
+    tmp.size = SizeX64::dword;
+
+    build.mov(tmp, luauNodeKeyTag(node));
+    build.and_(tmp, kTKeyTagMask);
+    build.cmp(tmp, tag);
+    build.jcc(ConditionX64::NotEqual, label);
+}
+
+inline void jumpIfNodeValueTagIs(AssemblyBuilderX64& build, RegisterX64 node, lua_Type tag, Label& label)
+{
+    build.cmp(dword[node + offsetof(LuaNode, val) + offsetof(TValue, tt)], tag);
+    build.jcc(ConditionX64::Equal, label);
+}
+
+inline void jumpIfNodeKeyNotInExpectedSlot(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 node, OperandX64 expectedKey, Label& label)
+{
+    jumpIfNodeKeyTagIsNot(build, tmp, node, LUA_TSTRING, label);
+
+    build.mov(tmp, expectedKey);
+    build.cmp(tmp, luauNodeKeyValue(node));
+    build.jcc(ConditionX64::NotEqual, label);
+
+    jumpIfNodeValueTagIs(build, node, LUA_TNIL, label);
+}
+
+void jumpOnNumberCmp(AssemblyBuilderX64& build, RegisterX64 tmp, OperandX64 lhs, OperandX64 rhs, IrCondition cond, Label& label);
+void jumpOnAnyCmpFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, IrCondition cond, Label& label);
+
+void getTableNodeAtCachedSlot(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 node, RegisterX64 table, int pcpos);
+void convertNumberToIndexOrJump(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 numd, RegisterX64 numi, Label& label);
+
+void callArithHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, OperandX64 c, TMS tm);
+void callLengthHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb);
+void callPrepareForN(IrRegAllocX64& regs, AssemblyBuilderX64& build, int limit, int step, int init);
+void callGetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra);
+void callSetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra);
+void checkObjectBarrierConditions(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 object, int ra, Label& skip);
+void callBarrierObject(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 object, IrOp objectOp, int ra);
+void callBarrierTableFast(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 table, IrOp tableOp);
+void callStepGc(IrRegAllocX64& regs, AssemblyBuilderX64& build);
+
+void emitExit(AssemblyBuilderX64& build, bool continueInVm);
+void emitUpdateBase(AssemblyBuilderX64& build);
+void emitInterrupt(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos);
+void emitFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int offset, int pcpos);
+
+void emitContinueCallInVm(AssemblyBuilderX64& build);
+
+} // namespace X64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/EmitInstructionX64.cpp
+++ b/luau/CodeGen/src/EmitInstructionX64.cpp
@ -0,0 +1,493 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "EmitInstructionX64.h"
+
+#include "Luau/AssemblyBuilderX64.h"
+#include "Luau/IrRegAllocX64.h"
+
+#include "CustomExecUtils.h"
+#include "EmitCommonX64.h"
+
+namespace Luau
+{
+namespace CodeGen
+{
+namespace X64
+{
+
+void emitInstCall(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults)
+{
+    build.mov(rArg1, rState);
+    build.lea(rArg2, luauRegAddress(ra));
+
+    if (nparams == LUA_MULTRET)
+        build.mov(rArg3, qword[rState + offsetof(lua_State, top)]);
+    else
+        build.lea(rArg3, luauRegAddress(ra + 1 + nparams));
+
+    build.mov(dwordReg(rArg4), nresults);
+    build.call(qword[rNativeContext + offsetof(NativeContext, callProlog)]);
+    RegisterX64 ccl = rax; // Returned from callProlog
+
+    emitUpdateBase(build);
+
+    Label cFuncCall;
+
+    build.test(byte[ccl + offsetof(Closure, isC)], 1);
+    build.jcc(ConditionX64::NotZero, cFuncCall);
+
+    {
+        RegisterX64 proto = rcx; // Sync with emitContinueCallInVm
+        RegisterX64 ci = rdx;
+        RegisterX64 argi = rsi;
+        RegisterX64 argend = rdi;
+
+        build.mov(proto, qword[ccl + offsetof(Closure, l.p)]);
+
+        // Switch current Closure
+        build.mov(sClosure, ccl); // Last use of 'ccl'
+
+        build.mov(ci, qword[rState + offsetof(lua_State, ci)]);
+
+        Label fillnil, exitfillnil;
+
+        // argi = L->top
+        build.mov(argi, qword[rState + offsetof(lua_State, top)]);
+
+        // argend = L->base + p->numparams
+        build.movzx(eax, byte[proto + offsetof(Proto, numparams)]);
+        build.shl(eax, kTValueSizeLog2);
+        build.lea(argend, addr[rBase + rax]);
+
+        // while (argi < argend) setnilvalue(argi++);
+        build.setLabel(fillnil);
+        build.cmp(argi, argend);
+        build.jcc(ConditionX64::NotBelow, exitfillnil);
+
+        build.mov(dword[argi + offsetof(TValue, tt)], LUA_TNIL);
+        build.add(argi, sizeof(TValue));
+        build.jmp(fillnil); // This loop rarely runs so it's not worth repeating cmp/jcc
+
+        build.setLabel(exitfillnil);
+
+        // Set L->top to ci->top as most function expect (no vararg)
+        build.mov(rax, qword[ci + offsetof(CallInfo, top)]);
+        build.mov(qword[rState + offsetof(lua_State, top)], rax);
+
+        // But if it is vararg, update it to 'argi'
+        Label skipVararg;
+
+        build.test(byte[proto + offsetof(Proto, is_vararg)], 1);
+        build.jcc(ConditionX64::Zero, skipVararg);
+
+        build.mov(qword[rState + offsetof(lua_State, top)], argi);
+        build.setLabel(skipVararg);
+
+        // Get native function entry
+        build.mov(rax, qword[proto + offsetof(Proto, exectarget)]);
+        build.test(rax, rax);
+        build.jcc(ConditionX64::Zero, helpers.continueCallInVm);
+
+        // Mark call frame as custom
+        build.mov(dword[ci + offsetof(CallInfo, flags)], LUA_CALLINFO_CUSTOM);
+
+        // Switch current constants
+        build.mov(rConstants, qword[proto + offsetof(Proto, k)]);
+
+        // Switch current code
+        build.mov(rdx, qword[proto + offsetof(Proto, code)]);
+        build.mov(sCode, rdx);
+
+        build.jmp(rax);
+    }
+
+    build.setLabel(cFuncCall);
+
+    {
+        // results = ccl->c.f(L);
+        build.mov(rArg1, rState);
+        build.call(qword[ccl + offsetof(Closure, c.f)]); // Last use of 'ccl'
+        RegisterX64 results = eax;
+
+        build.test(results, results);                            // test here will set SF=1 for a negative number and it always sets OF to 0
+        build.jcc(ConditionX64::Less, helpers.exitNoContinueVm); // jl jumps if SF != OF
+
+        // We have special handling for small number of expected results below
+        if (nresults != 0 && nresults != 1)
+        {
+            build.mov(rArg1, rState);
+            build.mov(dwordReg(rArg2), nresults);
+            build.mov(dwordReg(rArg3), results);
+            build.call(qword[rNativeContext + offsetof(NativeContext, callEpilogC)]);
+
+            emitUpdateBase(build);
+            return;
+        }
+
+        RegisterX64 ci = rdx;
+        RegisterX64 cip = rcx;
+        RegisterX64 vali = rsi;
+
+        build.mov(ci, qword[rState + offsetof(lua_State, ci)]);
+        build.lea(cip, addr[ci - sizeof(CallInfo)]);
+
+        // L->base = cip->base
+        build.mov(rBase, qword[cip + offsetof(CallInfo, base)]);
+        build.mov(qword[rState + offsetof(lua_State, base)], rBase);
+
+        if (nresults == 1)
+        {
+            // Opportunistically copy the result we expected from (L->top - results)
+            build.mov(vali, qword[rState + offsetof(lua_State, top)]);
+            build.shl(results, kTValueSizeLog2);
+            build.sub(vali, qwordReg(results));
+            build.vmovups(xmm0, xmmword[vali]);
+            build.vmovups(luauReg(ra), xmm0);
+
+            Label skipnil;
+
+            // If there was no result, override the value with 'nil'
+            build.test(results, results);
+            build.jcc(ConditionX64::NotZero, skipnil);
+            build.mov(luauRegTag(ra), LUA_TNIL);
+            build.setLabel(skipnil);
+        }
+
+        // L->ci = cip
+        build.mov(qword[rState + offsetof(lua_State, ci)], cip);
+
+        // L->top = cip->top
+        build.mov(rax, qword[cip + offsetof(CallInfo, top)]);
+        build.mov(qword[rState + offsetof(lua_State, top)], rax);
+    }
+}
+
+void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int actualResults)
+{
+    RegisterX64 ci = r8;
+    RegisterX64 cip = r9;
+    RegisterX64 res = rdi;
+    RegisterX64 nresults = esi;
+
+    build.mov(ci, qword[rState + offsetof(lua_State, ci)]);
+    build.lea(cip, addr[ci - sizeof(CallInfo)]);
+
+    // res = ci->func; note: we assume CALL always puts func+args and expects results to start at func
+    build.mov(res, qword[ci + offsetof(CallInfo, func)]);
+    // nresults = ci->nresults
+    build.mov(nresults, dword[ci + offsetof(CallInfo, nresults)]);
+
+    {
+        Label skipResultCopy;
+
+        RegisterX64 counter = ecx;
+
+        if (actualResults == 0)
+        {
+            // Our instruction doesn't have any results, so just fill results expected in parent with 'nil'
+            build.test(nresults, nresults);                     // test here will set SF=1 for a negative number, ZF=1 for zero and OF=0
+            build.jcc(ConditionX64::LessEqual, skipResultCopy); // jle jumps if SF != OF or ZF == 1
+
+            build.mov(counter, nresults);
+
+            Label repeatNilLoop = build.setLabel();
+            build.mov(dword[res + offsetof(TValue, tt)], LUA_TNIL);
+            build.add(res, sizeof(TValue));
+            build.dec(counter);
+            build.jcc(ConditionX64::NotZero, repeatNilLoop);
+        }
+        else if (actualResults == 1)
+        {
+            // Try setting our 1 result
+            build.test(nresults, nresults);
+            build.jcc(ConditionX64::Zero, skipResultCopy);
+
+            build.lea(counter, addr[nresults - 1]);
+
+            build.vmovups(xmm0, luauReg(ra));
+            build.vmovups(xmmword[res], xmm0);
+            build.add(res, sizeof(TValue));
+
+            // Fill the rest of the expected results with 'nil'
+            build.test(counter, counter);                       // test here will set SF=1 for a negative number, ZF=1 for zero and OF=0
+            build.jcc(ConditionX64::LessEqual, skipResultCopy); // jle jumps if SF != OF or ZF == 1
+
+            Label repeatNilLoop = build.setLabel();
+            build.mov(dword[res + offsetof(TValue, tt)], LUA_TNIL);
+            build.add(res, sizeof(TValue));
+            build.dec(counter);
+            build.jcc(ConditionX64::NotZero, repeatNilLoop);
+        }
+        else
+        {
+            RegisterX64 vali = rax;
+            RegisterX64 valend = rdx;
+
+            // Copy return values into parent stack (but only up to nresults!)
+            build.test(nresults, nresults);
+            build.jcc(ConditionX64::Zero, skipResultCopy);
+
+            // vali = ra
+            build.lea(vali, luauRegAddress(ra));
+
+            // Copy as much as possible for MULTRET calls, and only as much as needed otherwise
+            if (actualResults == LUA_MULTRET)
+                build.mov(valend, qword[rState + offsetof(lua_State, top)]); // valend = L->top
+            else
+                build.lea(valend, luauRegAddress(ra + actualResults)); // valend = ra + actualResults
+
+            build.mov(counter, nresults);
+
+            Label repeatValueLoop, exitValueLoop;
+
+            build.setLabel(repeatValueLoop);
+            build.cmp(vali, valend);
+            build.jcc(ConditionX64::NotBelow, exitValueLoop);
+
+            build.vmovups(xmm0, xmmword[vali]);
+            build.vmovups(xmmword[res], xmm0);
+            build.add(vali, sizeof(TValue));
+            build.add(res, sizeof(TValue));
+            build.dec(counter);
+            build.jcc(ConditionX64::NotZero, repeatValueLoop);
+
+            build.setLabel(exitValueLoop);
+
+            // Fill the rest of the expected results with 'nil'
+            build.test(counter, counter);                       // test here will set SF=1 for a negative number, ZF=1 for zero and OF=0
+            build.jcc(ConditionX64::LessEqual, skipResultCopy); // jle jumps if SF != OF or ZF == 1
+
+            Label repeatNilLoop = build.setLabel();
+            build.mov(dword[res + offsetof(TValue, tt)], LUA_TNIL);
+            build.add(res, sizeof(TValue));
+            build.dec(counter);
+            build.jcc(ConditionX64::NotZero, repeatNilLoop);
+        }
+
+        build.setLabel(skipResultCopy);
+    }
+
+    build.mov(qword[rState + offsetof(lua_State, ci)], cip);     // L->ci = cip
+    build.mov(rBase, qword[cip + offsetof(CallInfo, base)]);     // sync base = L->base while we have a chance
+    build.mov(qword[rState + offsetof(lua_State, base)], rBase); // L->base = cip->base
+
+    // Start with result for LUA_MULTRET/exit value
+    build.mov(qword[rState + offsetof(lua_State, top)], res); // L->top = res
+
+    // Unlikely, but this might be the last return from VM
+    build.test(byte[ci + offsetof(CallInfo, flags)], LUA_CALLINFO_RETURN);
+    build.jcc(ConditionX64::NotZero, helpers.exitNoContinueVm);
+
+    Label skipFixedRetTop;
+    build.test(nresults, nresults);                 // test here will set SF=1 for a negative number and it always sets OF to 0
+    build.jcc(ConditionX64::Less, skipFixedRetTop); // jl jumps if SF != OF
+    build.mov(rax, qword[cip + offsetof(CallInfo, top)]);
+    build.mov(qword[rState + offsetof(lua_State, top)], rax); // L->top = cip->top
+    build.setLabel(skipFixedRetTop);
+
+    // Returning back to the previous function is a bit tricky
+    // Registers alive: r9 (cip)
+    RegisterX64 proto = rcx;
+    RegisterX64 execdata = rbx;
+
+    // Change closure
+    build.mov(rax, qword[cip + offsetof(CallInfo, func)]);
+    build.mov(rax, qword[rax + offsetof(TValue, value.gc)]);
+    build.mov(sClosure, rax);
+
+    build.mov(proto, qword[rax + offsetof(Closure, l.p)]);
+
+    build.mov(execdata, qword[proto + offsetof(Proto, execdata)]);
+
+    build.test(byte[cip + offsetof(CallInfo, flags)], LUA_CALLINFO_CUSTOM);
+    build.jcc(ConditionX64::Zero, helpers.exitContinueVm); // Continue in interpreter if function has no native data
+
+    // Change constants
+    build.mov(rConstants, qword[proto + offsetof(Proto, k)]);
+
+    // Change code
+    build.mov(rdx, qword[proto + offsetof(Proto, code)]);
+    build.mov(sCode, rdx);
+
+    build.mov(rax, qword[cip + offsetof(CallInfo, savedpc)]);
+
+    // To get instruction index from instruction pointer, we need to divide byte offset by 4
+    // But we will actually need to scale instruction index by 4 back to byte offset later so it cancels out
+    build.sub(rax, rdx);
+
+    // Get new instruction location and jump to it
+    build.mov(edx, dword[execdata + rax]);
+    build.add(rdx, qword[proto + offsetof(Proto, exectarget)]);
+    build.jmp(rdx);
+}
+
+void emitInstSetList(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, int count, uint32_t index)
+{
+    OperandX64 last = index + count - 1;
+
+    // Using non-volatile 'rbx' for dynamic 'count' value (for LUA_MULTRET) to skip later recomputation
+    // We also keep 'count' scaled by sizeof(TValue) here as it helps in the loop below
+    RegisterX64 cscaled = rbx;
+
+    if (count == LUA_MULTRET)
+    {
+        RegisterX64 tmp = rax;
+
+        // count = L->top - rb
+        build.mov(cscaled, qword[rState + offsetof(lua_State, top)]);
+        build.lea(tmp, luauRegAddress(rb));
+        build.sub(cscaled, tmp); // Using byte difference
+
+        // L->top = L->ci->top
+        build.mov(tmp, qword[rState + offsetof(lua_State, ci)]);
+        build.mov(tmp, qword[tmp + offsetof(CallInfo, top)]);
+        build.mov(qword[rState + offsetof(lua_State, top)], tmp);
+
+        // last = index + count - 1;
+        last = edx;
+        build.mov(last, dwordReg(cscaled));
+        build.shr(last, kTValueSizeLog2);
+        build.add(last, index - 1);
+    }
+
+    Label skipResize;
+
+    RegisterX64 table = regs.takeReg(rax, kInvalidInstIdx);
+
+    build.mov(table, luauRegValue(ra));
+
+    // Resize if h->sizearray < last
+    build.cmp(dword[table + offsetof(Table, sizearray)], last);
+    build.jcc(ConditionX64::NotBelow, skipResize);
+
+    // Argument setup reordered to avoid conflicts
+    LUAU_ASSERT(rArg3 != table);
+    build.mov(dwordReg(rArg3), last);
+    build.mov(rArg2, table);
+    build.mov(rArg1, rState);
+    build.call(qword[rNativeContext + offsetof(NativeContext, luaH_resizearray)]);
+    build.mov(table, luauRegValue(ra)); // Reload cloberred register value
+
+    build.setLabel(skipResize);
+
+    RegisterX64 arrayDst = rdx;
+    RegisterX64 offset = rcx;
+
+    build.mov(arrayDst, qword[table + offsetof(Table, array)]);
+
+    const int kUnrollSetListLimit = 4;
+
+    if (count != LUA_MULTRET && count <= kUnrollSetListLimit)
+    {
+        for (int i = 0; i < count; ++i)
+        {
+            // setobj2t(L, &array[index + i - 1], rb + i);
+            build.vmovups(xmm0, luauRegValue(rb + i));
+            build.vmovups(xmmword[arrayDst + (index + i - 1) * sizeof(TValue)], xmm0);
+        }
+    }
+    else
+    {
+        LUAU_ASSERT(count != 0);
+
+        build.xor_(offset, offset);
+        if (index != 1)
+            build.add(arrayDst, (index - 1) * sizeof(TValue));
+
+        Label repeatLoop, endLoop;
+        OperandX64 limit = count == LUA_MULTRET ? cscaled : OperandX64(count * sizeof(TValue));
+
+        // If c is static, we will always do at least one iteration
+        if (count == LUA_MULTRET)
+        {
+            build.cmp(offset, limit);
+            build.jcc(ConditionX64::NotBelow, endLoop);
+        }
+
+        build.setLabel(repeatLoop);
+
+        // setobj2t(L, &array[index + i - 1], rb + i);
+        build.vmovups(xmm0, xmmword[offset + rBase + rb * sizeof(TValue)]); // luauReg(rb) unwrapped to add offset
+        build.vmovups(xmmword[offset + arrayDst], xmm0);
+
+        build.add(offset, sizeof(TValue));
+        build.cmp(offset, limit);
+        build.jcc(ConditionX64::Below, repeatLoop);
+
+        build.setLabel(endLoop);
+    }
+
+    callBarrierTableFast(regs, build, table, {});
+}
+
+void emitInstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat)
+{
+    // ipairs-style traversal is handled in IR
+    LUAU_ASSERT(aux >= 0);
+
+    // This is a fast-path for builtin table iteration, tag check for 'ra' has to be performed before emitting this instruction
+
+    // Registers are chosen in this way to simplify fallback code for the node part
+    RegisterX64 table = rArg2;
+    RegisterX64 index = rArg3;
+    RegisterX64 elemPtr = rax;
+
+    build.mov(table, luauRegValue(ra + 1));
+    build.mov(index, luauRegValue(ra + 2));
+
+    // &array[index]
+    build.mov(dwordReg(elemPtr), dwordReg(index));
+    build.shl(dwordReg(elemPtr), kTValueSizeLog2);
+    build.add(elemPtr, qword[table + offsetof(Table, array)]);
+
+    // Clear extra variables since we might have more than two
+    for (int i = 2; i < aux; ++i)
+        build.mov(luauRegTag(ra + 3 + i), LUA_TNIL);
+
+    Label skipArray, skipArrayNil;
+
+    // First we advance index through the array portion
+    // while (unsigned(index) < unsigned(sizearray))
+    Label arrayLoop = build.setLabel();
+    build.cmp(dwordReg(index), dword[table + offsetof(Table, sizearray)]);
+    build.jcc(ConditionX64::NotBelow, skipArray);
+
+    // If element is nil, we increment the index; if it's not, we still need 'index + 1' inside
+    build.inc(index);
+
+    build.cmp(dword[elemPtr + offsetof(TValue, tt)], LUA_TNIL);
+    build.jcc(ConditionX64::Equal, skipArrayNil);
+
+    // setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)));
+    build.mov(luauRegValue(ra + 2), index);
+    // Tag should already be set to lightuserdata
+
+    // setnvalue(ra + 3, double(index + 1));
+    build.vcvtsi2sd(xmm0, xmm0, dwordReg(index));
+    build.vmovsd(luauRegValue(ra + 3), xmm0);
+    build.mov(luauRegTag(ra + 3), LUA_TNUMBER);
+
+    // setobj2s(L, ra + 4, e);
+    setLuauReg(build, xmm2, ra + 4, xmmword[elemPtr]);
+
+    build.jmp(loopRepeat);
+
+    build.setLabel(skipArrayNil);
+
+    // Index already incremented, advance to next array element
+    build.add(elemPtr, sizeof(TValue));
+    build.jmp(arrayLoop);
+
+    build.setLabel(skipArray);
+
+    // Call helper to assign next node value or to signal loop exit
+    build.mov(rArg1, rState);
+    // rArg2 and rArg3 are already set
+    build.lea(rArg4, luauRegAddress(ra));
+    build.call(qword[rNativeContext + offsetof(NativeContext, forgLoopNodeIter)]);
+    build.test(al, al);
+    build.jcc(ConditionX64::NotZero, loopRepeat);
+}
+
+} // namespace X64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/EmitInstructionX64.h
+++ b/luau/CodeGen/src/EmitInstructionX64.h
@ -0,0 +1,27 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include <stdint.h>
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+struct Label;
+struct ModuleHelpers;
+
+namespace X64
+{
+
+class AssemblyBuilderX64;
+struct IrRegAllocX64;
+
+void emitInstCall(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults);
+void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int actualResults);
+void emitInstSetList(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, int count, uint32_t index);
+void emitInstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat);
+
+} // namespace X64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/IrAnalysis.cpp
+++ b/luau/CodeGen/src/IrAnalysis.cpp
@ -0,0 +1,691 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "Luau/IrAnalysis.h"
+
+#include "Luau/DenseHash.h"
+#include "Luau/IrData.h"
+#include "Luau/IrUtils.h"
+
+#include "lobject.h"
+
+#include <bitset>
+
+#include <stddef.h>
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+void updateUseCounts(IrFunction& function)
+{
+    std::vector<IrBlock>& blocks = function.blocks;
+    std::vector<IrInst>& instructions = function.instructions;
+
+    for (IrBlock& block : blocks)
+        block.useCount = 0;
+
+    for (IrInst& inst : instructions)
+        inst.useCount = 0;
+
+    auto checkOp = [&](IrOp op) {
+        if (op.kind == IrOpKind::Inst)
+        {
+            IrInst& target = instructions[op.index];
+            LUAU_ASSERT(target.useCount < 0xffff);
+            target.useCount++;
+        }
+        else if (op.kind == IrOpKind::Block)
+        {
+            IrBlock& target = blocks[op.index];
+            LUAU_ASSERT(target.useCount < 0xffff);
+            target.useCount++;
+        }
+    };
+
+    for (IrInst& inst : instructions)
+    {
+        checkOp(inst.a);
+        checkOp(inst.b);
+        checkOp(inst.c);
+        checkOp(inst.d);
+        checkOp(inst.e);
+        checkOp(inst.f);
+    }
+}
+
+void updateLastUseLocations(IrFunction& function)
+{
+    std::vector<IrInst>& instructions = function.instructions;
+
+    for (IrInst& inst : instructions)
+        inst.lastUse = 0;
+
+    for (size_t instIdx = 0; instIdx < instructions.size(); ++instIdx)
+    {
+        IrInst& inst = instructions[instIdx];
+
+        auto checkOp = [&](IrOp op) {
+            if (op.kind == IrOpKind::Inst)
+                instructions[op.index].lastUse = uint32_t(instIdx);
+        };
+
+        if (isPseudo(inst.cmd))
+            continue;
+
+        checkOp(inst.a);
+        checkOp(inst.b);
+        checkOp(inst.c);
+        checkOp(inst.d);
+        checkOp(inst.e);
+        checkOp(inst.f);
+    }
+}
+
+uint32_t getNextInstUse(IrFunction& function, uint32_t targetInstIdx, uint32_t startInstIdx)
+{
+    LUAU_ASSERT(startInstIdx < function.instructions.size());
+    IrInst& targetInst = function.instructions[targetInstIdx];
+
+    for (uint32_t i = startInstIdx; i <= targetInst.lastUse; i++)
+    {
+        IrInst& inst = function.instructions[i];
+
+        if (isPseudo(inst.cmd))
+            continue;
+
+        if (inst.a.kind == IrOpKind::Inst && inst.a.index == targetInstIdx)
+            return i;
+
+        if (inst.b.kind == IrOpKind::Inst && inst.b.index == targetInstIdx)
+            return i;
+
+        if (inst.c.kind == IrOpKind::Inst && inst.c.index == targetInstIdx)
+            return i;
+
+        if (inst.d.kind == IrOpKind::Inst && inst.d.index == targetInstIdx)
+            return i;
+
+        if (inst.e.kind == IrOpKind::Inst && inst.e.index == targetInstIdx)
+            return i;
+
+        if (inst.f.kind == IrOpKind::Inst && inst.f.index == targetInstIdx)
+            return i;
+    }
+
+    // There must be a next use since there is the last use location
+    LUAU_ASSERT(!"failed to find next use");
+    return targetInst.lastUse;
+}
+
+std::pair<uint32_t, uint32_t> getLiveInOutValueCount(IrFunction& function, IrBlock& block)
+{
+    uint32_t liveIns = 0;
+    uint32_t liveOuts = 0;
+
+    auto checkOp = [&](IrOp op) {
+        if (op.kind == IrOpKind::Inst)
+        {
+            if (op.index >= block.start && op.index <= block.finish)
+                liveOuts--;
+            else
+                liveIns++;
+        }
+    };
+
+    for (uint32_t instIdx = block.start; instIdx <= block.finish; instIdx++)
+    {
+        IrInst& inst = function.instructions[instIdx];
+
+        if (isPseudo(inst.cmd))
+            continue;
+
+        liveOuts += inst.useCount;
+
+        checkOp(inst.a);
+        checkOp(inst.b);
+        checkOp(inst.c);
+        checkOp(inst.d);
+        checkOp(inst.e);
+        checkOp(inst.f);
+    }
+
+    return std::make_pair(liveIns, liveOuts);
+}
+
+uint32_t getLiveInValueCount(IrFunction& function, IrBlock& block)
+{
+    return getLiveInOutValueCount(function, block).first;
+}
+
+uint32_t getLiveOutValueCount(IrFunction& function, IrBlock& block)
+{
+    return getLiveInOutValueCount(function, block).second;
+}
+
+void requireVariadicSequence(RegisterSet& sourceRs, const RegisterSet& defRs, uint8_t varargStart)
+{
+    if (!defRs.varargSeq)
+    {
+        // Peel away registers from variadic sequence that we define
+        while (defRs.regs.test(varargStart))
+            varargStart++;
+
+        LUAU_ASSERT(!sourceRs.varargSeq || sourceRs.varargStart == varargStart);
+
+        sourceRs.varargSeq = true;
+        sourceRs.varargStart = varargStart;
+    }
+    else
+    {
+        // Variadic use sequence might include registers before def sequence
+        for (int i = varargStart; i < defRs.varargStart; i++)
+        {
+            if (!defRs.regs.test(i))
+                sourceRs.regs.set(i);
+        }
+    }
+}
+
+static RegisterSet computeBlockLiveInRegSet(IrFunction& function, const IrBlock& block, RegisterSet& defRs, std::bitset<256>& capturedRegs)
+{
+    RegisterSet inRs;
+
+    auto def = [&](IrOp op, int offset = 0) {
+        defRs.regs.set(vmRegOp(op) + offset, true);
+    };
+
+    auto use = [&](IrOp op, int offset = 0) {
+        if (!defRs.regs.test(vmRegOp(op) + offset))
+            inRs.regs.set(vmRegOp(op) + offset, true);
+    };
+
+    auto maybeDef = [&](IrOp op) {
+        if (op.kind == IrOpKind::VmReg)
+            defRs.regs.set(vmRegOp(op), true);
+    };
+
+    auto maybeUse = [&](IrOp op) {
+        if (op.kind == IrOpKind::VmReg)
+        {
+            if (!defRs.regs.test(vmRegOp(op)))
+                inRs.regs.set(vmRegOp(op), true);
+        }
+    };
+
+    auto defVarargs = [&](uint8_t varargStart) {
+        defRs.varargSeq = true;
+        defRs.varargStart = varargStart;
+    };
+
+    auto useVarargs = [&](uint8_t varargStart) {
+        requireVariadicSequence(inRs, defRs, varargStart);
+
+        // Variadic sequence has been consumed
+        defRs.varargSeq = false;
+        defRs.varargStart = 0;
+    };
+
+    auto defRange = [&](int start, int count) {
+        if (count == -1)
+        {
+            defVarargs(start);
+        }
+        else
+        {
+            for (int i = start; i < start + count; i++)
+                defRs.regs.set(i, true);
+        }
+    };
+
+    auto useRange = [&](int start, int count) {
+        if (count == -1)
+        {
+            useVarargs(start);
+        }
+        else
+        {
+            for (int i = start; i < start + count; i++)
+            {
+                if (!defRs.regs.test(i))
+                    inRs.regs.set(i, true);
+            }
+        }
+    };
+
+    for (uint32_t instIdx = block.start; instIdx <= block.finish; instIdx++)
+    {
+        const IrInst& inst = function.instructions[instIdx];
+
+        // For correct analysis, all instruction uses must be handled before handling the definitions
+        switch (inst.cmd)
+        {
+        case IrCmd::LOAD_TAG:
+        case IrCmd::LOAD_POINTER:
+        case IrCmd::LOAD_DOUBLE:
+        case IrCmd::LOAD_INT:
+        case IrCmd::LOAD_TVALUE:
+            maybeUse(inst.a); // Argument can also be a VmConst
+            break;
+        case IrCmd::STORE_TAG:
+        case IrCmd::STORE_POINTER:
+        case IrCmd::STORE_DOUBLE:
+        case IrCmd::STORE_INT:
+        case IrCmd::STORE_VECTOR:
+        case IrCmd::STORE_TVALUE:
+            maybeDef(inst.a); // Argument can also be a pointer value
+            break;
+        case IrCmd::JUMP_IF_TRUTHY:
+        case IrCmd::JUMP_IF_FALSY:
+            use(inst.a);
+            break;
+        case IrCmd::JUMP_CMP_ANY:
+            use(inst.a);
+            use(inst.b);
+            break;
+            // A <- B, C
+        case IrCmd::DO_ARITH:
+        case IrCmd::GET_TABLE:
+            use(inst.b);
+            maybeUse(inst.c); // Argument can also be a VmConst
+
+            def(inst.a);
+            break;
+        case IrCmd::SET_TABLE:
+            use(inst.a);
+            use(inst.b);
+            maybeUse(inst.c); // Argument can also be a VmConst
+            break;
+            // A <- B
+        case IrCmd::DO_LEN:
+            use(inst.b);
+
+            def(inst.a);
+            break;
+        case IrCmd::GET_IMPORT:
+            def(inst.a);
+            break;
+        case IrCmd::CONCAT:
+            useRange(vmRegOp(inst.a), function.uintOp(inst.b));
+
+            defRange(vmRegOp(inst.a), function.uintOp(inst.b));
+            break;
+        case IrCmd::GET_UPVALUE:
+            def(inst.a);
+            break;
+        case IrCmd::SET_UPVALUE:
+            use(inst.b);
+            break;
+        case IrCmd::PREPARE_FORN:
+            use(inst.a);
+            use(inst.b);
+            use(inst.c);
+
+            def(inst.a);
+            def(inst.b);
+            def(inst.c);
+            break;
+        case IrCmd::INTERRUPT:
+            break;
+        case IrCmd::BARRIER_OBJ:
+        case IrCmd::BARRIER_TABLE_FORWARD:
+            use(inst.b);
+            break;
+        case IrCmd::CLOSE_UPVALS:
+            // Closing an upvalue should be counted as a register use (it copies the fresh register value)
+            // But we lack the required information about the specific set of registers that are affected
+            // Because we don't plan to optimize captured registers atm, we skip full dataflow analysis for them right now
+            break;
+        case IrCmd::CAPTURE:
+            maybeUse(inst.a);
+
+            if (function.boolOp(inst.b))
+                capturedRegs.set(vmRegOp(inst.a), true);
+            break;
+        case IrCmd::SETLIST:
+            use(inst.b);
+            useRange(vmRegOp(inst.c), function.intOp(inst.d));
+            break;
+        case IrCmd::CALL:
+            use(inst.a);
+            useRange(vmRegOp(inst.a) + 1, function.intOp(inst.b));
+
+            defRange(vmRegOp(inst.a), function.intOp(inst.c));
+            break;
+        case IrCmd::RETURN:
+            useRange(vmRegOp(inst.a), function.intOp(inst.b));
+            break;
+
+            // TODO: FASTCALL is more restrictive than INVOKE_FASTCALL; we should either determine the exact semantics, or rework it
+        case IrCmd::FASTCALL:
+        case IrCmd::INVOKE_FASTCALL:
+            if (int count = function.intOp(inst.e); count != -1)
+            {
+                if (count >= 3)
+                {
+                    LUAU_ASSERT(inst.d.kind == IrOpKind::VmReg && vmRegOp(inst.d) == vmRegOp(inst.c) + 1);
+
+                    useRange(vmRegOp(inst.c), count);
+                }
+                else
+                {
+                    if (count >= 1)
+                        use(inst.c);
+
+                    if (count >= 2)
+                        maybeUse(inst.d); // Argument can also be a VmConst
+                }
+            }
+            else
+            {
+                useVarargs(vmRegOp(inst.c));
+            }
+
+            // Multiple return sequences (count == -1) are defined by ADJUST_STACK_TO_REG
+            if (int count = function.intOp(inst.f); count != -1)
+                defRange(vmRegOp(inst.b), count);
+            break;
+        case IrCmd::FORGLOOP:
+            // First register is not used by instruction, we check that it's still 'nil' with CHECK_TAG
+            use(inst.a, 1);
+            use(inst.a, 2);
+
+            def(inst.a, 2);
+            defRange(vmRegOp(inst.a) + 3, function.intOp(inst.b));
+            break;
+        case IrCmd::FORGLOOP_FALLBACK:
+            useRange(vmRegOp(inst.a), 3);
+
+            def(inst.a, 2);
+            defRange(vmRegOp(inst.a) + 3, uint8_t(function.intOp(inst.b))); // ignore most significant bit
+            break;
+        case IrCmd::FORGPREP_XNEXT_FALLBACK:
+            use(inst.b);
+            break;
+        case IrCmd::FALLBACK_GETGLOBAL:
+            def(inst.b);
+            break;
+        case IrCmd::FALLBACK_SETGLOBAL:
+            use(inst.b);
+            break;
+        case IrCmd::FALLBACK_GETTABLEKS:
+            use(inst.c);
+
+            def(inst.b);
+            break;
+        case IrCmd::FALLBACK_SETTABLEKS:
+            use(inst.b);
+            use(inst.c);
+            break;
+        case IrCmd::FALLBACK_NAMECALL:
+            use(inst.c);
+
+            defRange(vmRegOp(inst.b), 2);
+            break;
+        case IrCmd::FALLBACK_PREPVARARGS:
+            // No effect on explicitly referenced registers
+            break;
+        case IrCmd::FALLBACK_GETVARARGS:
+            defRange(vmRegOp(inst.b), function.intOp(inst.c));
+            break;
+        case IrCmd::FALLBACK_NEWCLOSURE:
+            def(inst.b);
+            break;
+        case IrCmd::FALLBACK_DUPCLOSURE:
+            def(inst.b);
+            break;
+        case IrCmd::FALLBACK_FORGPREP:
+            use(inst.b);
+
+            defRange(vmRegOp(inst.b), 3);
+            break;
+        case IrCmd::ADJUST_STACK_TO_REG:
+            defRange(vmRegOp(inst.a), -1);
+            break;
+        case IrCmd::ADJUST_STACK_TO_TOP:
+            // While this can be considered to be a vararg consumer, it is already handled in fastcall instructions
+            break;
+
+        default:
+            // All instructions which reference registers have to be handled explicitly
+            LUAU_ASSERT(inst.a.kind != IrOpKind::VmReg);
+            LUAU_ASSERT(inst.b.kind != IrOpKind::VmReg);
+            LUAU_ASSERT(inst.c.kind != IrOpKind::VmReg);
+            LUAU_ASSERT(inst.d.kind != IrOpKind::VmReg);
+            LUAU_ASSERT(inst.e.kind != IrOpKind::VmReg);
+            LUAU_ASSERT(inst.f.kind != IrOpKind::VmReg);
+            break;
+        }
+    }
+
+    return inRs;
+}
+
+// The algorithm used here is commonly known as backwards data-flow analysis.
+// For each block, we track 'upward-exposed' (live-in) uses of registers - a use of a register that hasn't been defined in the block yet.
+// We also track the set of registers that were defined in the block.
+// When initial live-in sets of registers are computed, propagation of those uses upwards through predecessors is performed.
+// If predecessor doesn't define the register, we have to add it to the live-in set.
+// Extending the set of live-in registers of a block requires re-checking of that block.
+// Propagation runs iteratively, using a worklist of blocks to visit until a fixed point is reached.
+// This algorithm can be easily extended to cover phi instructions, but we don't use those yet.
+static void computeCfgLiveInOutRegSets(IrFunction& function)
+{
+    CfgInfo& info = function.cfg;
+
+    // Clear existing data
+    // 'in' and 'captured' data is not cleared because it will be overwritten below
+    info.def.clear();
+    info.out.clear();
+
+    // Try to compute Luau VM register use-def info
+    info.in.resize(function.blocks.size());
+    info.def.resize(function.blocks.size());
+    info.out.resize(function.blocks.size());
+
+    // Captured registers are tracked for the whole function
+    // It should be possible to have a more precise analysis for them in the future
+    std::bitset<256> capturedRegs;
+
+    // First we compute live-in set of each block
+    for (size_t blockIdx = 0; blockIdx < function.blocks.size(); blockIdx++)
+    {
+        const IrBlock& block = function.blocks[blockIdx];
+
+        if (block.kind == IrBlockKind::Dead)
+            continue;
+
+        info.in[blockIdx] = computeBlockLiveInRegSet(function, block, info.def[blockIdx], capturedRegs);
+    }
+
+    info.captured.regs = capturedRegs;
+
+    // With live-in sets ready, we can arrive at a fixed point for both in/out registers by requesting required registers from predecessors
+    std::vector<uint32_t> worklist;
+
+    std::vector<uint8_t> inWorklist;
+    inWorklist.resize(function.blocks.size(), false);
+
+    // We will have to visit each block at least once, so we add all of them to the worklist immediately
+    for (size_t blockIdx = 0; blockIdx < function.blocks.size(); blockIdx++)
+    {
+        const IrBlock& block = function.blocks[blockIdx];
+
+        if (block.kind == IrBlockKind::Dead)
+            continue;
+
+        worklist.push_back(uint32_t(blockIdx));
+        inWorklist[blockIdx] = true;
+    }
+
+    while (!worklist.empty())
+    {
+        uint32_t blockIdx = worklist.back();
+        worklist.pop_back();
+        inWorklist[blockIdx] = false;
+
+        IrBlock& curr = function.blocks[blockIdx];
+        RegisterSet& inRs = info.in[blockIdx];
+        RegisterSet& defRs = info.def[blockIdx];
+        RegisterSet& outRs = info.out[blockIdx];
+
+        // Current block has to provide all registers in successor blocks
+        BlockIteratorWrapper successorsIt = successors(info, blockIdx);
+        for (uint32_t succIdx : successorsIt)
+        {
+            IrBlock& succ = function.blocks[succIdx];
+
+            // This is a step away from the usual definition of live range flow through CFG
+            // Exit from a regular block to a fallback block is not considered a block terminator
+            // This is because fallback blocks define an alternative implementation of the same operations
+            // This can cause the current block to define more registers that actually were available at fallback entry
+            if (curr.kind != IrBlockKind::Fallback && succ.kind == IrBlockKind::Fallback)
+            {
+                // If this is the only successor, this skip will not be valid
+                LUAU_ASSERT(successorsIt.size() != 1);
+                continue;
+            }
+
+            const RegisterSet& succRs = info.in[succIdx];
+
+            outRs.regs |= succRs.regs;
+
+            if (succRs.varargSeq)
+            {
+                LUAU_ASSERT(!outRs.varargSeq || outRs.varargStart == succRs.varargStart);
+
+                outRs.varargSeq = true;
+                outRs.varargStart = succRs.varargStart;
+            }
+        }
+
+        RegisterSet oldInRs = inRs;
+
+        // If current block didn't define a live-out, it has to be live-in
+        inRs.regs |= outRs.regs & ~defRs.regs;
+
+        if (outRs.varargSeq)
+            requireVariadicSequence(inRs, defRs, outRs.varargStart);
+
+        // If we have new live-ins, we have to notify all predecessors
+        // We don't allow changes to the start of the variadic sequence, so we skip checking that member
+        if (inRs.regs != oldInRs.regs || inRs.varargSeq != oldInRs.varargSeq)
+        {
+            for (uint32_t predIdx : predecessors(info, blockIdx))
+            {
+                if (!inWorklist[predIdx])
+                {
+                    worklist.push_back(predIdx);
+                    inWorklist[predIdx] = true;
+                }
+            }
+        }
+    }
+
+    // If Proto data is available, validate that entry block arguments match required registers
+    if (function.proto)
+    {
+        RegisterSet& entryIn = info.in[0];
+
+        LUAU_ASSERT(!entryIn.varargSeq);
+
+        for (size_t i = 0; i < entryIn.regs.size(); i++)
+            LUAU_ASSERT(!entryIn.regs.test(i) || i < function.proto->numparams);
+    }
+}
+
+static void computeCfgBlockEdges(IrFunction& function)
+{
+    CfgInfo& info = function.cfg;
+
+    // Clear existing data
+    info.predecessorsOffsets.clear();
+    info.successorsOffsets.clear();
+
+    // Compute predecessors block edges
+    info.predecessorsOffsets.reserve(function.blocks.size());
+    info.successorsOffsets.reserve(function.blocks.size());
+
+    int edgeCount = 0;
+
+    for (const IrBlock& block : function.blocks)
+    {
+        info.predecessorsOffsets.push_back(edgeCount);
+        edgeCount += block.useCount;
+    }
+
+    info.predecessors.resize(edgeCount);
+    info.successors.resize(edgeCount);
+
+    edgeCount = 0;
+
+    for (size_t blockIdx = 0; blockIdx < function.blocks.size(); blockIdx++)
+    {
+        const IrBlock& block = function.blocks[blockIdx];
+
+        info.successorsOffsets.push_back(edgeCount);
+
+        if (block.kind == IrBlockKind::Dead)
+            continue;
+
+        for (uint32_t instIdx = block.start; instIdx <= block.finish; instIdx++)
+        {
+            const IrInst& inst = function.instructions[instIdx];
+
+            auto checkOp = [&](IrOp op) {
+                if (op.kind == IrOpKind::Block)
+                {
+                    // We use a trick here, where we use the starting offset of the predecessor list as the position where to write next predecessor
+                    // The values will be adjusted back in a separate loop later
+                    info.predecessors[info.predecessorsOffsets[op.index]++] = uint32_t(blockIdx);
+
+                    info.successors[edgeCount++] = op.index;
+                }
+            };
+
+            checkOp(inst.a);
+            checkOp(inst.b);
+            checkOp(inst.c);
+            checkOp(inst.d);
+            checkOp(inst.e);
+            checkOp(inst.f);
+        }
+    }
+
+    // Offsets into the predecessor list were used as iterators in the previous loop
+    // To adjust them back, block use count is subtracted (predecessor count is equal to how many uses block has)
+    for (size_t blockIdx = 0; blockIdx < function.blocks.size(); blockIdx++)
+    {
+        const IrBlock& block = function.blocks[blockIdx];
+
+        info.predecessorsOffsets[blockIdx] -= block.useCount;
+    }
+}
+
+void computeCfgInfo(IrFunction& function)
+{
+    computeCfgBlockEdges(function);
+    computeCfgLiveInOutRegSets(function);
+}
+
+BlockIteratorWrapper predecessors(const CfgInfo& cfg, uint32_t blockIdx)
+{
+    LUAU_ASSERT(blockIdx < cfg.predecessorsOffsets.size());
+
+    uint32_t start = cfg.predecessorsOffsets[blockIdx];
+    uint32_t end = blockIdx + 1 < cfg.predecessorsOffsets.size() ? cfg.predecessorsOffsets[blockIdx + 1] : uint32_t(cfg.predecessors.size());
+
+    return BlockIteratorWrapper{cfg.predecessors.data() + start, cfg.predecessors.data() + end};
+}
+
+BlockIteratorWrapper successors(const CfgInfo& cfg, uint32_t blockIdx)
+{
+    LUAU_ASSERT(blockIdx < cfg.successorsOffsets.size());
+
+    uint32_t start = cfg.successorsOffsets[blockIdx];
+    uint32_t end = blockIdx + 1 < cfg.successorsOffsets.size() ? cfg.successorsOffsets[blockIdx + 1] : uint32_t(cfg.successors.size());
+
+    return BlockIteratorWrapper{cfg.successors.data() + start, cfg.successors.data() + end};
+}
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/IrBuilder.cpp
+++ b/luau/CodeGen/src/IrBuilder.cpp
@ -0,0 +1,651 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "Luau/IrBuilder.h"
+
+#include "Luau/IrAnalysis.h"
+#include "Luau/IrUtils.h"
+
+#include "CustomExecUtils.h"
+#include "IrTranslation.h"
+
+#include "lapi.h"
+
+#include <string.h>
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+constexpr unsigned kNoAssociatedBlockIndex = ~0u;
+
+IrBuilder::IrBuilder()
+    : constantMap({IrConstKind::Bool, ~0ull})
+{
+}
+
+void IrBuilder::buildFunctionIr(Proto* proto)
+{
+    function.proto = proto;
+
+    // Rebuild original control flow blocks
+    rebuildBytecodeBasicBlocks(proto);
+
+    function.bcMapping.resize(proto->sizecode, {~0u, ~0u});
+
+    // Translate all instructions to IR inside blocks
+    for (int i = 0; i < proto->sizecode;)
+    {
+        const Instruction* pc = &proto->code[i];
+        LuauOpcode op = LuauOpcode(LUAU_INSN_OP(*pc));
+
+        int nexti = i + getOpLength(op);
+        LUAU_ASSERT(nexti <= proto->sizecode);
+
+        function.bcMapping[i] = {uint32_t(function.instructions.size()), ~0u};
+
+        // Begin new block at this instruction if it was in the bytecode or requested during translation
+        if (instIndexToBlock[i] != kNoAssociatedBlockIndex)
+            beginBlock(blockAtInst(i));
+
+        // We skip dead bytecode instructions when they appear after block was already terminated
+        if (!inTerminatedBlock)
+            translateInst(op, pc, i);
+
+        i = nexti;
+        LUAU_ASSERT(i <= proto->sizecode);
+
+        // If we are going into a new block at the next instruction and it's a fallthrough, jump has to be placed to mark block termination
+        if (i < int(instIndexToBlock.size()) && instIndexToBlock[i] != kNoAssociatedBlockIndex)
+        {
+            if (!isBlockTerminator(function.instructions.back().cmd))
+                inst(IrCmd::JUMP, blockAtInst(i));
+        }
+    }
+
+    // Now that all has been generated, compute use counts
+    updateUseCounts(function);
+}
+
+void IrBuilder::rebuildBytecodeBasicBlocks(Proto* proto)
+{
+    instIndexToBlock.resize(proto->sizecode, kNoAssociatedBlockIndex);
+
+    // Mark jump targets
+    std::vector<uint8_t> jumpTargets(proto->sizecode, 0);
+
+    for (int i = 0; i < proto->sizecode;)
+    {
+        const Instruction* pc = &proto->code[i];
+        LuauOpcode op = LuauOpcode(LUAU_INSN_OP(*pc));
+
+        int target = getJumpTarget(*pc, uint32_t(i));
+
+        if (target >= 0 && !isFastCall(op))
+            jumpTargets[target] = true;
+
+        i += getOpLength(op);
+        LUAU_ASSERT(i <= proto->sizecode);
+    }
+
+
+    // Bytecode blocks are created at bytecode jump targets and the start of a function
+    jumpTargets[0] = true;
+
+    for (int i = 0; i < proto->sizecode; i++)
+    {
+        if (jumpTargets[i])
+        {
+            IrOp b = block(IrBlockKind::Bytecode);
+            instIndexToBlock[i] = b.index;
+        }
+    }
+}
+
+void IrBuilder::translateInst(LuauOpcode op, const Instruction* pc, int i)
+{
+    switch (op)
+    {
+    case LOP_NOP:
+        break;
+    case LOP_LOADNIL:
+        translateInstLoadNil(*this, pc);
+        break;
+    case LOP_LOADB:
+        translateInstLoadB(*this, pc, i);
+        break;
+    case LOP_LOADN:
+        translateInstLoadN(*this, pc);
+        break;
+    case LOP_LOADK:
+        translateInstLoadK(*this, pc);
+        break;
+    case LOP_LOADKX:
+        translateInstLoadKX(*this, pc);
+        break;
+    case LOP_MOVE:
+        translateInstMove(*this, pc);
+        break;
+    case LOP_GETGLOBAL:
+        translateInstGetGlobal(*this, pc, i);
+        break;
+    case LOP_SETGLOBAL:
+        translateInstSetGlobal(*this, pc, i);
+        break;
+    case LOP_CALL:
+        inst(IrCmd::INTERRUPT, constUint(i));
+        inst(IrCmd::SET_SAVEDPC, constUint(i + 1));
+
+        inst(IrCmd::CALL, vmReg(LUAU_INSN_A(*pc)), constInt(LUAU_INSN_B(*pc) - 1), constInt(LUAU_INSN_C(*pc) - 1));
+
+        if (activeFastcallFallback)
+        {
+            inst(IrCmd::JUMP, fastcallFallbackReturn);
+
+            beginBlock(fastcallFallbackReturn);
+
+            activeFastcallFallback = false;
+        }
+        break;
+    case LOP_RETURN:
+        inst(IrCmd::INTERRUPT, constUint(i));
+
+        inst(IrCmd::RETURN, vmReg(LUAU_INSN_A(*pc)), constInt(LUAU_INSN_B(*pc) - 1));
+        break;
+    case LOP_GETTABLE:
+        translateInstGetTable(*this, pc, i);
+        break;
+    case LOP_SETTABLE:
+        translateInstSetTable(*this, pc, i);
+        break;
+    case LOP_GETTABLEKS:
+        translateInstGetTableKS(*this, pc, i);
+        break;
+    case LOP_SETTABLEKS:
+        translateInstSetTableKS(*this, pc, i);
+        break;
+    case LOP_GETTABLEN:
+        translateInstGetTableN(*this, pc, i);
+        break;
+    case LOP_SETTABLEN:
+        translateInstSetTableN(*this, pc, i);
+        break;
+    case LOP_JUMP:
+        translateInstJump(*this, pc, i);
+        break;
+    case LOP_JUMPBACK:
+        translateInstJumpBack(*this, pc, i);
+        break;
+    case LOP_JUMPIF:
+        translateInstJumpIf(*this, pc, i, /* not_ */ false);
+        break;
+    case LOP_JUMPIFNOT:
+        translateInstJumpIf(*this, pc, i, /* not_ */ true);
+        break;
+    case LOP_JUMPIFEQ:
+        translateInstJumpIfEq(*this, pc, i, /* not_ */ false);
+        break;
+    case LOP_JUMPIFLE:
+        translateInstJumpIfCond(*this, pc, i, IrCondition::LessEqual);
+        break;
+    case LOP_JUMPIFLT:
+        translateInstJumpIfCond(*this, pc, i, IrCondition::Less);
+        break;
+    case LOP_JUMPIFNOTEQ:
+        translateInstJumpIfEq(*this, pc, i, /* not_ */ true);
+        break;
+    case LOP_JUMPIFNOTLE:
+        translateInstJumpIfCond(*this, pc, i, IrCondition::NotLessEqual);
+        break;
+    case LOP_JUMPIFNOTLT:
+        translateInstJumpIfCond(*this, pc, i, IrCondition::NotLess);
+        break;
+    case LOP_JUMPX:
+        translateInstJumpX(*this, pc, i);
+        break;
+    case LOP_JUMPXEQKNIL:
+        translateInstJumpxEqNil(*this, pc, i);
+        break;
+    case LOP_JUMPXEQKB:
+        translateInstJumpxEqB(*this, pc, i);
+        break;
+    case LOP_JUMPXEQKN:
+        translateInstJumpxEqN(*this, pc, i);
+        break;
+    case LOP_JUMPXEQKS:
+        translateInstJumpxEqS(*this, pc, i);
+        break;
+    case LOP_ADD:
+        translateInstBinary(*this, pc, i, TM_ADD);
+        break;
+    case LOP_SUB:
+        translateInstBinary(*this, pc, i, TM_SUB);
+        break;
+    case LOP_MUL:
+        translateInstBinary(*this, pc, i, TM_MUL);
+        break;
+    case LOP_DIV:
+        translateInstBinary(*this, pc, i, TM_DIV);
+        break;
+    case LOP_MOD:
+        translateInstBinary(*this, pc, i, TM_MOD);
+        break;
+    case LOP_POW:
+        translateInstBinary(*this, pc, i, TM_POW);
+        break;
+    case LOP_ADDK:
+        translateInstBinaryK(*this, pc, i, TM_ADD);
+        break;
+    case LOP_SUBK:
+        translateInstBinaryK(*this, pc, i, TM_SUB);
+        break;
+    case LOP_MULK:
+        translateInstBinaryK(*this, pc, i, TM_MUL);
+        break;
+    case LOP_DIVK:
+        translateInstBinaryK(*this, pc, i, TM_DIV);
+        break;
+    case LOP_MODK:
+        translateInstBinaryK(*this, pc, i, TM_MOD);
+        break;
+    case LOP_POWK:
+        translateInstBinaryK(*this, pc, i, TM_POW);
+        break;
+    case LOP_NOT:
+        translateInstNot(*this, pc);
+        break;
+    case LOP_MINUS:
+        translateInstMinus(*this, pc, i);
+        break;
+    case LOP_LENGTH:
+        translateInstLength(*this, pc, i);
+        break;
+    case LOP_NEWTABLE:
+        translateInstNewTable(*this, pc, i);
+        break;
+    case LOP_DUPTABLE:
+        translateInstDupTable(*this, pc, i);
+        break;
+    case LOP_SETLIST:
+        inst(IrCmd::SETLIST, constUint(i), vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), constInt(LUAU_INSN_C(*pc) - 1), constUint(pc[1]));
+        break;
+    case LOP_GETUPVAL:
+        translateInstGetUpval(*this, pc, i);
+        break;
+    case LOP_SETUPVAL:
+        translateInstSetUpval(*this, pc, i);
+        break;
+    case LOP_CLOSEUPVALS:
+        translateInstCloseUpvals(*this, pc);
+        break;
+    case LOP_FASTCALL:
+    {
+        int skip = LUAU_INSN_C(*pc);
+        IrOp next = blockAtInst(i + skip + 2);
+
+        translateFastCallN(*this, pc, i, false, 0, {}, next);
+
+        activeFastcallFallback = true;
+        fastcallFallbackReturn = next;
+        break;
+    }
+    case LOP_FASTCALL1:
+    {
+        int skip = LUAU_INSN_C(*pc);
+        IrOp next = blockAtInst(i + skip + 2);
+
+        translateFastCallN(*this, pc, i, true, 1, undef(), next);
+
+        activeFastcallFallback = true;
+        fastcallFallbackReturn = next;
+        break;
+    }
+    case LOP_FASTCALL2:
+    {
+        int skip = LUAU_INSN_C(*pc);
+        IrOp next = blockAtInst(i + skip + 2);
+
+        translateFastCallN(*this, pc, i, true, 2, vmReg(pc[1]), next);
+
+        activeFastcallFallback = true;
+        fastcallFallbackReturn = next;
+        break;
+    }
+    case LOP_FASTCALL2K:
+    {
+        int skip = LUAU_INSN_C(*pc);
+        IrOp next = blockAtInst(i + skip + 2);
+
+        translateFastCallN(*this, pc, i, true, 2, vmConst(pc[1]), next);
+
+        activeFastcallFallback = true;
+        fastcallFallbackReturn = next;
+        break;
+    }
+    case LOP_FORNPREP:
+        translateInstForNPrep(*this, pc, i);
+        break;
+    case LOP_FORNLOOP:
+        translateInstForNLoop(*this, pc, i);
+        break;
+    case LOP_FORGLOOP:
+    {
+        int aux = int(pc[1]);
+
+        // We have a translation for ipairs-style traversal, general loop iteration is still too complex
+        if (aux < 0)
+        {
+            translateInstForGLoopIpairs(*this, pc, i);
+        }
+        else
+        {
+            int ra = LUAU_INSN_A(*pc);
+
+            IrOp loopRepeat = blockAtInst(i + 1 + LUAU_INSN_D(*pc));
+            IrOp loopExit = blockAtInst(i + getOpLength(LOP_FORGLOOP));
+            IrOp fallback = block(IrBlockKind::Fallback);
+
+            inst(IrCmd::INTERRUPT, constUint(i));
+            loadAndCheckTag(vmReg(ra), LUA_TNIL, fallback);
+
+            inst(IrCmd::FORGLOOP, vmReg(ra), constInt(aux), loopRepeat, loopExit);
+
+            beginBlock(fallback);
+            inst(IrCmd::SET_SAVEDPC, constUint(i + 1));
+            inst(IrCmd::FORGLOOP_FALLBACK, vmReg(ra), constInt(aux), loopRepeat, loopExit);
+
+            beginBlock(loopExit);
+        }
+        break;
+    }
+    case LOP_FORGPREP_NEXT:
+        translateInstForGPrepNext(*this, pc, i);
+        break;
+    case LOP_FORGPREP_INEXT:
+        translateInstForGPrepInext(*this, pc, i);
+        break;
+    case LOP_AND:
+        translateInstAndX(*this, pc, i, vmReg(LUAU_INSN_C(*pc)));
+        break;
+    case LOP_ANDK:
+        translateInstAndX(*this, pc, i, vmConst(LUAU_INSN_C(*pc)));
+        break;
+    case LOP_OR:
+        translateInstOrX(*this, pc, i, vmReg(LUAU_INSN_C(*pc)));
+        break;
+    case LOP_ORK:
+        translateInstOrX(*this, pc, i, vmConst(LUAU_INSN_C(*pc)));
+        break;
+    case LOP_COVERAGE:
+        inst(IrCmd::COVERAGE, constUint(i));
+        break;
+    case LOP_GETIMPORT:
+        translateInstGetImport(*this, pc, i);
+        break;
+    case LOP_CONCAT:
+        translateInstConcat(*this, pc, i);
+        break;
+    case LOP_CAPTURE:
+        translateInstCapture(*this, pc, i);
+        break;
+    case LOP_NAMECALL:
+        translateInstNamecall(*this, pc, i);
+        break;
+    case LOP_PREPVARARGS:
+        inst(IrCmd::FALLBACK_PREPVARARGS, constUint(i), constInt(LUAU_INSN_A(*pc)));
+        break;
+    case LOP_GETVARARGS:
+        inst(IrCmd::FALLBACK_GETVARARGS, constUint(i), vmReg(LUAU_INSN_A(*pc)), constInt(LUAU_INSN_B(*pc) - 1));
+        break;
+    case LOP_NEWCLOSURE:
+        inst(IrCmd::FALLBACK_NEWCLOSURE, constUint(i), vmReg(LUAU_INSN_A(*pc)), constUint(LUAU_INSN_D(*pc)));
+        break;
+    case LOP_DUPCLOSURE:
+        inst(IrCmd::FALLBACK_DUPCLOSURE, constUint(i), vmReg(LUAU_INSN_A(*pc)), vmConst(LUAU_INSN_D(*pc)));
+        break;
+    case LOP_FORGPREP:
+    {
+        IrOp loopStart = blockAtInst(i + 1 + LUAU_INSN_D(*pc));
+
+        inst(IrCmd::FALLBACK_FORGPREP, constUint(i), vmReg(LUAU_INSN_A(*pc)), loopStart);
+        break;
+    }
+    default:
+        LUAU_ASSERT(!"unknown instruction");
+        break;
+    }
+}
+
+bool IrBuilder::isInternalBlock(IrOp block)
+{
+    IrBlock& target = function.blocks[block.index];
+
+    return target.kind == IrBlockKind::Internal;
+}
+
+void IrBuilder::beginBlock(IrOp block)
+{
+    IrBlock& target = function.blocks[block.index];
+    activeBlockIdx = block.index;
+
+    LUAU_ASSERT(target.start == ~0u || target.start == uint32_t(function.instructions.size()));
+
+    target.start = uint32_t(function.instructions.size());
+
+    inTerminatedBlock = false;
+}
+
+void IrBuilder::loadAndCheckTag(IrOp loc, uint8_t tag, IrOp fallback)
+{
+    inst(IrCmd::CHECK_TAG, inst(IrCmd::LOAD_TAG, loc), constTag(tag), fallback);
+}
+
+void IrBuilder::clone(const IrBlock& source, bool removeCurrentTerminator)
+{
+    DenseHashMap<uint32_t, uint32_t> instRedir{~0u};
+
+    auto redirect = [&instRedir](IrOp& op) {
+        if (op.kind == IrOpKind::Inst)
+        {
+            if (const uint32_t* newIndex = instRedir.find(op.index))
+                op.index = *newIndex;
+            else
+                LUAU_ASSERT(!"values can only be used if they are defined in the same block");
+        }
+    };
+
+    if (removeCurrentTerminator && inTerminatedBlock)
+    {
+        IrBlock& active = function.blocks[activeBlockIdx];
+        IrInst& term = function.instructions[active.finish];
+
+        kill(function, term);
+        inTerminatedBlock = false;
+    }
+
+    for (uint32_t index = source.start; index <= source.finish; index++)
+    {
+        LUAU_ASSERT(index < function.instructions.size());
+        IrInst clone = function.instructions[index];
+
+        // Skip pseudo instructions to make clone more compact, but validate that they have no users
+        if (isPseudo(clone.cmd))
+        {
+            LUAU_ASSERT(clone.useCount == 0);
+            continue;
+        }
+
+        redirect(clone.a);
+        redirect(clone.b);
+        redirect(clone.c);
+        redirect(clone.d);
+        redirect(clone.e);
+        redirect(clone.f);
+
+        addUse(function, clone.a);
+        addUse(function, clone.b);
+        addUse(function, clone.c);
+        addUse(function, clone.d);
+        addUse(function, clone.e);
+        addUse(function, clone.f);
+
+        // Instructions that referenced the original will have to be adjusted to use the clone
+        instRedir[index] = uint32_t(function.instructions.size());
+
+        // Reconstruct the fresh clone
+        inst(clone.cmd, clone.a, clone.b, clone.c, clone.d, clone.e, clone.f);
+    }
+}
+
+IrOp IrBuilder::undef()
+{
+    return {IrOpKind::Undef, 0};
+}
+
+IrOp IrBuilder::constBool(bool value)
+{
+    IrConst constant;
+    constant.kind = IrConstKind::Bool;
+    constant.valueBool = value;
+    return constAny(constant, uint64_t(value));
+}
+
+IrOp IrBuilder::constInt(int value)
+{
+    IrConst constant;
+    constant.kind = IrConstKind::Int;
+    constant.valueInt = value;
+    return constAny(constant, uint64_t(value));
+}
+
+IrOp IrBuilder::constUint(unsigned value)
+{
+    IrConst constant;
+    constant.kind = IrConstKind::Uint;
+    constant.valueUint = value;
+    return constAny(constant, uint64_t(value));
+}
+
+IrOp IrBuilder::constDouble(double value)
+{
+    IrConst constant;
+    constant.kind = IrConstKind::Double;
+    constant.valueDouble = value;
+
+    uint64_t asCommonKey;
+    static_assert(sizeof(asCommonKey) == sizeof(value), "Expecting double to be 64-bit");
+    memcpy(&asCommonKey, &value, sizeof(value));
+
+    return constAny(constant, asCommonKey);
+}
+
+IrOp IrBuilder::constTag(uint8_t value)
+{
+    IrConst constant;
+    constant.kind = IrConstKind::Tag;
+    constant.valueTag = value;
+    return constAny(constant, uint64_t(value));
+}
+
+IrOp IrBuilder::constAny(IrConst constant, uint64_t asCommonKey)
+{
+    ConstantKey key{constant.kind, asCommonKey};
+
+    if (uint32_t* cache = constantMap.find(key))
+        return {IrOpKind::Constant, *cache};
+
+    uint32_t index = uint32_t(function.constants.size());
+    function.constants.push_back(constant);
+
+    constantMap[key] = index;
+
+    return {IrOpKind::Constant, index};
+}
+
+IrOp IrBuilder::cond(IrCondition cond)
+{
+    return {IrOpKind::Condition, uint32_t(cond)};
+}
+
+IrOp IrBuilder::inst(IrCmd cmd)
+{
+    return inst(cmd, {}, {}, {}, {}, {}, {});
+}
+
+IrOp IrBuilder::inst(IrCmd cmd, IrOp a)
+{
+    return inst(cmd, a, {}, {}, {}, {}, {});
+}
+
+IrOp IrBuilder::inst(IrCmd cmd, IrOp a, IrOp b)
+{
+    return inst(cmd, a, b, {}, {}, {}, {});
+}
+
+IrOp IrBuilder::inst(IrCmd cmd, IrOp a, IrOp b, IrOp c)
+{
+    return inst(cmd, a, b, c, {}, {}, {});
+}
+
+IrOp IrBuilder::inst(IrCmd cmd, IrOp a, IrOp b, IrOp c, IrOp d)
+{
+    return inst(cmd, a, b, c, d, {}, {});
+}
+
+IrOp IrBuilder::inst(IrCmd cmd, IrOp a, IrOp b, IrOp c, IrOp d, IrOp e)
+{
+    return inst(cmd, a, b, c, d, e, {});
+}
+
+IrOp IrBuilder::inst(IrCmd cmd, IrOp a, IrOp b, IrOp c, IrOp d, IrOp e, IrOp f)
+{
+    uint32_t index = uint32_t(function.instructions.size());
+    function.instructions.push_back({cmd, a, b, c, d, e, f});
+
+    LUAU_ASSERT(!inTerminatedBlock);
+
+    if (isBlockTerminator(cmd))
+    {
+        function.blocks[activeBlockIdx].finish = index;
+        inTerminatedBlock = true;
+    }
+
+    return {IrOpKind::Inst, index};
+}
+
+IrOp IrBuilder::block(IrBlockKind kind)
+{
+    if (kind == IrBlockKind::Internal && activeFastcallFallback)
+        kind = IrBlockKind::Fallback;
+
+    uint32_t index = uint32_t(function.blocks.size());
+    function.blocks.push_back(IrBlock{kind});
+    return IrOp{IrOpKind::Block, index};
+}
+
+IrOp IrBuilder::blockAtInst(uint32_t index)
+{
+    uint32_t blockIndex = instIndexToBlock[index];
+
+    if (blockIndex != kNoAssociatedBlockIndex)
+        return IrOp{IrOpKind::Block, blockIndex};
+
+    return block(IrBlockKind::Internal);
+}
+
+IrOp IrBuilder::vmReg(uint8_t index)
+{
+    return {IrOpKind::VmReg, index};
+}
+
+IrOp IrBuilder::vmConst(uint32_t index)
+{
+    return {IrOpKind::VmConst, index};
+}
+
+IrOp IrBuilder::vmUpvalue(uint8_t index)
+{
+    return {IrOpKind::VmUpvalue, index};
+}
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/IrCallWrapperX64.cpp
+++ b/luau/CodeGen/src/IrCallWrapperX64.cpp
@ -0,0 +1,431 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "Luau/IrCallWrapperX64.h"
+
+#include "Luau/AssemblyBuilderX64.h"
+#include "Luau/IrRegAllocX64.h"
+
+#include "EmitCommonX64.h"
+
+namespace Luau
+{
+namespace CodeGen
+{
+namespace X64
+{
+
+static const std::array<OperandX64, 6> kWindowsGprOrder = {rcx, rdx, r8, r9, addr[rsp + 32], addr[rsp + 40]};
+static const std::array<OperandX64, 6> kSystemvGprOrder = {rdi, rsi, rdx, rcx, r8, r9};
+static const std::array<OperandX64, 4> kXmmOrder = {xmm0, xmm1, xmm2, xmm3}; // Common order for first 4 fp arguments on Windows/SystemV
+
+static bool sameUnderlyingRegister(RegisterX64 a, RegisterX64 b)
+{
+    SizeX64 underlyingSizeA = a.size == SizeX64::xmmword ? SizeX64::xmmword : SizeX64::qword;
+    SizeX64 underlyingSizeB = b.size == SizeX64::xmmword ? SizeX64::xmmword : SizeX64::qword;
+
+    return underlyingSizeA == underlyingSizeB && a.index == b.index;
+}
+
+IrCallWrapperX64::IrCallWrapperX64(IrRegAllocX64& regs, AssemblyBuilderX64& build, uint32_t instIdx)
+    : regs(regs)
+    , build(build)
+    , instIdx(instIdx)
+    , funcOp(noreg)
+{
+    gprUses.fill(0);
+    xmmUses.fill(0);
+}
+
+void IrCallWrapperX64::addArgument(SizeX64 targetSize, OperandX64 source, IrOp sourceOp)
+{
+    // Instruction operands rely on current instruction index for lifetime tracking
+    LUAU_ASSERT(instIdx != kInvalidInstIdx || sourceOp.kind == IrOpKind::None);
+
+    LUAU_ASSERT(argCount < kMaxCallArguments);
+    CallArgument& arg = args[argCount++];
+    arg = {targetSize, source, sourceOp};
+
+    arg.target = getNextArgumentTarget(targetSize);
+
+    if (build.abi == ABIX64::Windows)
+    {
+        // On Windows, gpr/xmm register positions move in sync
+        gprPos++;
+        xmmPos++;
+    }
+    else
+    {
+        if (targetSize == SizeX64::xmmword)
+            xmmPos++;
+        else
+            gprPos++;
+    }
+}
+
+void IrCallWrapperX64::addArgument(SizeX64 targetSize, ScopedRegX64& scopedReg)
+{
+    addArgument(targetSize, scopedReg.release(), {});
+}
+
+void IrCallWrapperX64::call(const OperandX64& func)
+{
+    funcOp = func;
+
+    countRegisterUses();
+
+    for (int i = 0; i < argCount; ++i)
+    {
+        CallArgument& arg = args[i];
+
+        if (arg.sourceOp.kind != IrOpKind::None)
+        {
+            if (IrInst* inst = regs.function.asInstOp(arg.sourceOp))
+            {
+                // Source registers are recorded separately from source operands in CallArgument
+                // If source is the last use of IrInst, clear the register from the operand
+                if (regs.isLastUseReg(*inst, instIdx))
+                    inst->regX64 = noreg;
+                // If it's not the last use and register is volatile, register ownership is taken, which also spills the operand
+                else if (inst->regX64.size == SizeX64::xmmword || regs.shouldFreeGpr(inst->regX64))
+                    regs.takeReg(inst->regX64, kInvalidInstIdx);
+            }
+        }
+
+        // Immediate values are stored at the end since they are not interfering and target register can still be used temporarily
+        if (arg.source.cat == CategoryX64::imm)
+        {
+            arg.candidate = false;
+        }
+        // Arguments passed through stack can be handled immediately
+        else if (arg.target.cat == CategoryX64::mem)
+        {
+            if (arg.source.cat == CategoryX64::mem)
+            {
+                ScopedRegX64 tmp{regs, arg.target.memSize};
+
+                freeSourceRegisters(arg);
+
+                if (arg.source.memSize == SizeX64::none)
+                    build.lea(tmp.reg, arg.source);
+                else
+                    build.mov(tmp.reg, arg.source);
+
+                build.mov(arg.target, tmp.reg);
+            }
+            else
+            {
+                freeSourceRegisters(arg);
+
+                build.mov(arg.target, arg.source);
+            }
+
+            arg.candidate = false;
+        }
+        // Skip arguments that are already in their place
+        else if (arg.source.cat == CategoryX64::reg && sameUnderlyingRegister(arg.target.base, arg.source.base))
+        {
+            freeSourceRegisters(arg);
+
+            // If target is not used as source in other arguments, prevent register allocator from giving it out
+            if (getRegisterUses(arg.target.base) == 0)
+                regs.takeReg(arg.target.base, kInvalidInstIdx);
+            else // Otherwise, make sure we won't free it when last source use is completed
+                addRegisterUse(arg.target.base);
+
+            arg.candidate = false;
+        }
+    }
+
+    // Repeat until we run out of arguments to pass
+    while (true)
+    {
+        // Find target argument register that is not an active source
+        if (CallArgument* candidate = findNonInterferingArgument())
+        {
+            // This section is only for handling register targets
+            LUAU_ASSERT(candidate->target.cat == CategoryX64::reg);
+
+            freeSourceRegisters(*candidate);
+
+            LUAU_ASSERT(getRegisterUses(candidate->target.base) == 0);
+            regs.takeReg(candidate->target.base, kInvalidInstIdx);
+
+            moveToTarget(*candidate);
+
+            candidate->candidate = false;
+        }
+        // If all registers cross-interfere (rcx <- rdx, rdx <- rcx), one has to be renamed
+        else if (RegisterX64 conflict = findConflictingTarget(); conflict != noreg)
+        {
+            renameConflictingRegister(conflict);
+        }
+        else
+        {
+            for (int i = 0; i < argCount; ++i)
+                LUAU_ASSERT(!args[i].candidate);
+            break;
+        }
+    }
+
+    // Handle immediate arguments last
+    for (int i = 0; i < argCount; ++i)
+    {
+        CallArgument& arg = args[i];
+
+        if (arg.source.cat == CategoryX64::imm)
+        {
+            // There could be a conflict with the function source register, make this argument a candidate to find it
+            arg.candidate = true;
+
+            if (RegisterX64 conflict = findConflictingTarget(); conflict != noreg)
+                renameConflictingRegister(conflict);
+
+            if (arg.target.cat == CategoryX64::reg)
+                regs.takeReg(arg.target.base, kInvalidInstIdx);
+
+            moveToTarget(arg);
+
+            arg.candidate = false;
+        }
+    }
+
+    // Free registers used in the function call
+    removeRegisterUse(funcOp.base);
+    removeRegisterUse(funcOp.index);
+
+    // Just before the call is made, argument registers are all marked as free in register allocator
+    for (int i = 0; i < argCount; ++i)
+    {
+        CallArgument& arg = args[i];
+
+        if (arg.target.cat == CategoryX64::reg)
+            regs.freeReg(arg.target.base);
+    }
+
+    regs.preserveAndFreeInstValues();
+
+    regs.assertAllFree();
+
+    build.call(funcOp);
+}
+
+RegisterX64 IrCallWrapperX64::suggestNextArgumentRegister(SizeX64 size) const
+{
+    OperandX64 target = getNextArgumentTarget(size);
+
+    return target.cat == CategoryX64::reg ? regs.takeReg(target.base, kInvalidInstIdx) : regs.allocReg(size, kInvalidInstIdx);
+}
+
+OperandX64 IrCallWrapperX64::getNextArgumentTarget(SizeX64 size) const
+{
+    if (size == SizeX64::xmmword)
+    {
+        LUAU_ASSERT(size_t(xmmPos) < kXmmOrder.size());
+        return kXmmOrder[xmmPos];
+    }
+
+    const std::array<OperandX64, 6>& gprOrder = build.abi == ABIX64::Windows ? kWindowsGprOrder : kSystemvGprOrder;
+
+    LUAU_ASSERT(size_t(gprPos) < gprOrder.size());
+    OperandX64 target = gprOrder[gprPos];
+
+    // Keep requested argument size
+    if (target.cat == CategoryX64::reg)
+        target.base.size = size;
+    else if (target.cat == CategoryX64::mem)
+        target.memSize = size;
+
+    return target;
+}
+
+void IrCallWrapperX64::countRegisterUses()
+{
+    for (int i = 0; i < argCount; ++i)
+    {
+        addRegisterUse(args[i].source.base);
+        addRegisterUse(args[i].source.index);
+    }
+
+    addRegisterUse(funcOp.base);
+    addRegisterUse(funcOp.index);
+}
+
+CallArgument* IrCallWrapperX64::findNonInterferingArgument()
+{
+    for (int i = 0; i < argCount; ++i)
+    {
+        CallArgument& arg = args[i];
+
+        if (arg.candidate && !interferesWithActiveSources(arg, i) && !interferesWithOperand(funcOp, arg.target.base))
+            return &arg;
+    }
+
+    return nullptr;
+}
+
+bool IrCallWrapperX64::interferesWithOperand(const OperandX64& op, RegisterX64 reg) const
+{
+    return sameUnderlyingRegister(op.base, reg) || sameUnderlyingRegister(op.index, reg);
+}
+
+bool IrCallWrapperX64::interferesWithActiveSources(const CallArgument& targetArg, int targetArgIndex) const
+{
+    for (int i = 0; i < argCount; ++i)
+    {
+        const CallArgument& arg = args[i];
+
+        if (arg.candidate && i != targetArgIndex && interferesWithOperand(arg.source, targetArg.target.base))
+            return true;
+    }
+
+    return false;
+}
+
+bool IrCallWrapperX64::interferesWithActiveTarget(RegisterX64 sourceReg) const
+{
+    for (int i = 0; i < argCount; ++i)
+    {
+        const CallArgument& arg = args[i];
+
+        if (arg.candidate && sameUnderlyingRegister(arg.target.base, sourceReg))
+            return true;
+    }
+
+    return false;
+}
+
+void IrCallWrapperX64::moveToTarget(CallArgument& arg)
+{
+    if (arg.source.cat == CategoryX64::reg)
+    {
+        RegisterX64 source = arg.source.base;
+
+        if (source.size == SizeX64::xmmword)
+            build.vmovsd(arg.target, source, source);
+        else
+            build.mov(arg.target, source);
+    }
+    else if (arg.source.cat == CategoryX64::imm)
+    {
+        build.mov(arg.target, arg.source);
+    }
+    else
+    {
+        if (arg.source.memSize == SizeX64::none)
+            build.lea(arg.target, arg.source);
+        else if (arg.target.base.size == SizeX64::xmmword && arg.source.memSize == SizeX64::xmmword)
+            build.vmovups(arg.target, arg.source);
+        else if (arg.target.base.size == SizeX64::xmmword)
+            build.vmovsd(arg.target, arg.source);
+        else
+            build.mov(arg.target, arg.source);
+    }
+}
+
+void IrCallWrapperX64::freeSourceRegisters(CallArgument& arg)
+{
+    removeRegisterUse(arg.source.base);
+    removeRegisterUse(arg.source.index);
+}
+
+void IrCallWrapperX64::renameRegister(RegisterX64& target, RegisterX64 reg, RegisterX64 replacement)
+{
+    if (sameUnderlyingRegister(target, reg))
+    {
+        addRegisterUse(replacement);
+        removeRegisterUse(target);
+
+        target.index = replacement.index; // Only change index, size is preserved
+    }
+}
+
+void IrCallWrapperX64::renameSourceRegisters(RegisterX64 reg, RegisterX64 replacement)
+{
+    for (int i = 0; i < argCount; ++i)
+    {
+        CallArgument& arg = args[i];
+
+        if (arg.candidate)
+        {
+            renameRegister(arg.source.base, reg, replacement);
+            renameRegister(arg.source.index, reg, replacement);
+        }
+    }
+
+    renameRegister(funcOp.base, reg, replacement);
+    renameRegister(funcOp.index, reg, replacement);
+}
+
+RegisterX64 IrCallWrapperX64::findConflictingTarget() const
+{
+    for (int i = 0; i < argCount; ++i)
+    {
+        const CallArgument& arg = args[i];
+
+        if (arg.candidate)
+        {
+            if (interferesWithActiveTarget(arg.source.base))
+                return arg.source.base;
+
+            if (interferesWithActiveTarget(arg.source.index))
+                return arg.source.index;
+        }
+    }
+
+    if (interferesWithActiveTarget(funcOp.base))
+        return funcOp.base;
+
+    if (interferesWithActiveTarget(funcOp.index))
+        return funcOp.index;
+
+    return noreg;
+}
+
+void IrCallWrapperX64::renameConflictingRegister(RegisterX64 conflict)
+{
+    // Get a fresh register
+    RegisterX64 freshReg = regs.allocReg(conflict.size, kInvalidInstIdx);
+
+    if (conflict.size == SizeX64::xmmword)
+        build.vmovsd(freshReg, conflict, conflict);
+    else
+        build.mov(freshReg, conflict);
+
+    renameSourceRegisters(conflict, freshReg);
+}
+
+int IrCallWrapperX64::getRegisterUses(RegisterX64 reg) const
+{
+    return reg.size == SizeX64::xmmword ? xmmUses[reg.index] : (reg.size != SizeX64::none ? gprUses[reg.index] : 0);
+}
+
+void IrCallWrapperX64::addRegisterUse(RegisterX64 reg)
+{
+    if (reg.size == SizeX64::xmmword)
+        xmmUses[reg.index]++;
+    else if (reg.size != SizeX64::none)
+        gprUses[reg.index]++;
+}
+
+void IrCallWrapperX64::removeRegisterUse(RegisterX64 reg)
+{
+    if (reg.size == SizeX64::xmmword)
+    {
+        LUAU_ASSERT(xmmUses[reg.index] != 0);
+        xmmUses[reg.index]--;
+
+        if (xmmUses[reg.index] == 0) // we don't use persistent xmm regs so no need to call shouldFreeRegister
+            regs.freeReg(reg);
+    }
+    else if (reg.size != SizeX64::none)
+    {
+        LUAU_ASSERT(gprUses[reg.index] != 0);
+        gprUses[reg.index]--;
+
+        if (gprUses[reg.index] == 0 && regs.shouldFreeGpr(reg))
+            regs.freeReg(reg);
+    }
+}
+
+} // namespace X64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/IrDump.cpp
+++ b/luau/CodeGen/src/IrDump.cpp
@ -0,0 +1,766 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "Luau/IrDump.h"
+
+#include "Luau/IrUtils.h"
+
+#include "lua.h"
+
+#include <stdarg.h>
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+static const char* textForCondition[] = {
+    "eq", "not_eq", "lt", "not_lt", "le", "not_le", "gt", "not_gt", "ge", "not_ge", "u_lt", "u_le", "u_gt", "u_ge"};
+static_assert(sizeof(textForCondition) / sizeof(textForCondition[0]) == size_t(IrCondition::Count), "all conditions have to be covered");
+
+const int kDetailsAlignColumn = 60;
+
+LUAU_PRINTF_ATTR(2, 3)
+static void append(std::string& result, const char* fmt, ...)
+{
+    char buf[256];
+    va_list args;
+    va_start(args, fmt);
+    vsnprintf(buf, sizeof(buf), fmt, args);
+    va_end(args);
+    result.append(buf);
+}
+
+static void padToDetailColumn(std::string& result, size_t lineStart)
+{
+    int pad = kDetailsAlignColumn - int(result.size() - lineStart);
+
+    if (pad > 0)
+        result.append(pad, ' ');
+}
+
+static const char* getTagName(uint8_t tag)
+{
+    switch (tag)
+    {
+    case LUA_TNIL:
+        return "tnil";
+    case LUA_TBOOLEAN:
+        return "tboolean";
+    case LUA_TLIGHTUSERDATA:
+        return "tlightuserdata";
+    case LUA_TNUMBER:
+        return "tnumber";
+    case LUA_TVECTOR:
+        return "tvector";
+    case LUA_TSTRING:
+        return "tstring";
+    case LUA_TTABLE:
+        return "ttable";
+    case LUA_TFUNCTION:
+        return "tfunction";
+    case LUA_TUSERDATA:
+        return "tuserdata";
+    case LUA_TTHREAD:
+        return "tthread";
+    default:
+        LUAU_ASSERT(!"Unknown type tag");
+        LUAU_UNREACHABLE();
+    }
+}
+
+const char* getCmdName(IrCmd cmd)
+{
+    switch (cmd)
+    {
+    case IrCmd::NOP:
+        return "NOP";
+    case IrCmd::LOAD_TAG:
+        return "LOAD_TAG";
+    case IrCmd::LOAD_POINTER:
+        return "LOAD_POINTER";
+    case IrCmd::LOAD_DOUBLE:
+        return "LOAD_DOUBLE";
+    case IrCmd::LOAD_INT:
+        return "LOAD_INT";
+    case IrCmd::LOAD_TVALUE:
+        return "LOAD_TVALUE";
+    case IrCmd::LOAD_NODE_VALUE_TV:
+        return "LOAD_NODE_VALUE_TV";
+    case IrCmd::LOAD_ENV:
+        return "LOAD_ENV";
+    case IrCmd::GET_ARR_ADDR:
+        return "GET_ARR_ADDR";
+    case IrCmd::GET_SLOT_NODE_ADDR:
+        return "GET_SLOT_NODE_ADDR";
+    case IrCmd::GET_HASH_NODE_ADDR:
+        return "GET_HASH_NODE_ADDR";
+    case IrCmd::STORE_TAG:
+        return "STORE_TAG";
+    case IrCmd::STORE_POINTER:
+        return "STORE_POINTER";
+    case IrCmd::STORE_DOUBLE:
+        return "STORE_DOUBLE";
+    case IrCmd::STORE_INT:
+        return "STORE_INT";
+    case IrCmd::STORE_VECTOR:
+        return "STORE_VECTOR";
+    case IrCmd::STORE_TVALUE:
+        return "STORE_TVALUE";
+    case IrCmd::STORE_NODE_VALUE_TV:
+        return "STORE_NODE_VALUE_TV";
+    case IrCmd::ADD_INT:
+        return "ADD_INT";
+    case IrCmd::SUB_INT:
+        return "SUB_INT";
+    case IrCmd::ADD_NUM:
+        return "ADD_NUM";
+    case IrCmd::SUB_NUM:
+        return "SUB_NUM";
+    case IrCmd::MUL_NUM:
+        return "MUL_NUM";
+    case IrCmd::DIV_NUM:
+        return "DIV_NUM";
+    case IrCmd::MOD_NUM:
+        return "MOD_NUM";
+    case IrCmd::MIN_NUM:
+        return "MIN_NUM";
+    case IrCmd::MAX_NUM:
+        return "MAX_NUM";
+    case IrCmd::UNM_NUM:
+        return "UNM_NUM";
+    case IrCmd::FLOOR_NUM:
+        return "FLOOR_NUM";
+    case IrCmd::CEIL_NUM:
+        return "CEIL_NUM";
+    case IrCmd::ROUND_NUM:
+        return "ROUND_NUM";
+    case IrCmd::SQRT_NUM:
+        return "SQRT_NUM";
+    case IrCmd::ABS_NUM:
+        return "ABS_NUM";
+    case IrCmd::NOT_ANY:
+        return "NOT_ANY";
+    case IrCmd::JUMP:
+        return "JUMP";
+    case IrCmd::JUMP_IF_TRUTHY:
+        return "JUMP_IF_TRUTHY";
+    case IrCmd::JUMP_IF_FALSY:
+        return "JUMP_IF_FALSY";
+    case IrCmd::JUMP_EQ_TAG:
+        return "JUMP_EQ_TAG";
+    case IrCmd::JUMP_EQ_INT:
+        return "JUMP_EQ_INT";
+    case IrCmd::JUMP_LT_INT:
+        return "JUMP_LT_INT";
+    case IrCmd::JUMP_GE_UINT:
+        return "JUMP_GE_UINT";
+    case IrCmd::JUMP_EQ_POINTER:
+        return "JUMP_EQ_POINTER";
+    case IrCmd::JUMP_CMP_NUM:
+        return "JUMP_CMP_NUM";
+    case IrCmd::JUMP_CMP_ANY:
+        return "JUMP_CMP_ANY";
+    case IrCmd::JUMP_SLOT_MATCH:
+        return "JUMP_SLOT_MATCH";
+    case IrCmd::TABLE_LEN:
+        return "TABLE_LEN";
+    case IrCmd::NEW_TABLE:
+        return "NEW_TABLE";
+    case IrCmd::DUP_TABLE:
+        return "DUP_TABLE";
+    case IrCmd::TRY_NUM_TO_INDEX:
+        return "TRY_NUM_TO_INDEX";
+    case IrCmd::TRY_CALL_FASTGETTM:
+        return "TRY_CALL_FASTGETTM";
+    case IrCmd::INT_TO_NUM:
+        return "INT_TO_NUM";
+    case IrCmd::UINT_TO_NUM:
+        return "UINT_TO_NUM";
+    case IrCmd::NUM_TO_INT:
+        return "NUM_TO_INT";
+    case IrCmd::NUM_TO_UINT:
+        return "NUM_TO_UINT";
+    case IrCmd::ADJUST_STACK_TO_REG:
+        return "ADJUST_STACK_TO_REG";
+    case IrCmd::ADJUST_STACK_TO_TOP:
+        return "ADJUST_STACK_TO_TOP";
+    case IrCmd::FASTCALL:
+        return "FASTCALL";
+    case IrCmd::INVOKE_FASTCALL:
+        return "INVOKE_FASTCALL";
+    case IrCmd::CHECK_FASTCALL_RES:
+        return "CHECK_FASTCALL_RES";
+    case IrCmd::DO_ARITH:
+        return "DO_ARITH";
+    case IrCmd::DO_LEN:
+        return "DO_LEN";
+    case IrCmd::GET_TABLE:
+        return "GET_TABLE";
+    case IrCmd::SET_TABLE:
+        return "SET_TABLE";
+    case IrCmd::GET_IMPORT:
+        return "GET_IMPORT";
+    case IrCmd::CONCAT:
+        return "CONCAT";
+    case IrCmd::GET_UPVALUE:
+        return "GET_UPVALUE";
+    case IrCmd::SET_UPVALUE:
+        return "SET_UPVALUE";
+    case IrCmd::PREPARE_FORN:
+        return "PREPARE_FORN";
+    case IrCmd::CHECK_TAG:
+        return "CHECK_TAG";
+    case IrCmd::CHECK_READONLY:
+        return "CHECK_READONLY";
+    case IrCmd::CHECK_NO_METATABLE:
+        return "CHECK_NO_METATABLE";
+    case IrCmd::CHECK_SAFE_ENV:
+        return "CHECK_SAFE_ENV";
+    case IrCmd::CHECK_ARRAY_SIZE:
+        return "CHECK_ARRAY_SIZE";
+    case IrCmd::CHECK_SLOT_MATCH:
+        return "CHECK_SLOT_MATCH";
+    case IrCmd::CHECK_NODE_NO_NEXT:
+        return "CHECK_NODE_NO_NEXT";
+    case IrCmd::INTERRUPT:
+        return "INTERRUPT";
+    case IrCmd::CHECK_GC:
+        return "CHECK_GC";
+    case IrCmd::BARRIER_OBJ:
+        return "BARRIER_OBJ";
+    case IrCmd::BARRIER_TABLE_BACK:
+        return "BARRIER_TABLE_BACK";
+    case IrCmd::BARRIER_TABLE_FORWARD:
+        return "BARRIER_TABLE_FORWARD";
+    case IrCmd::SET_SAVEDPC:
+        return "SET_SAVEDPC";
+    case IrCmd::CLOSE_UPVALS:
+        return "CLOSE_UPVALS";
+    case IrCmd::CAPTURE:
+        return "CAPTURE";
+    case IrCmd::SETLIST:
+        return "SETLIST";
+    case IrCmd::CALL:
+        return "CALL";
+    case IrCmd::RETURN:
+        return "RETURN";
+    case IrCmd::FORGLOOP:
+        return "FORGLOOP";
+    case IrCmd::FORGLOOP_FALLBACK:
+        return "FORGLOOP_FALLBACK";
+    case IrCmd::FORGPREP_XNEXT_FALLBACK:
+        return "FORGPREP_XNEXT_FALLBACK";
+    case IrCmd::COVERAGE:
+        return "COVERAGE";
+    case IrCmd::FALLBACK_GETGLOBAL:
+        return "FALLBACK_GETGLOBAL";
+    case IrCmd::FALLBACK_SETGLOBAL:
+        return "FALLBACK_SETGLOBAL";
+    case IrCmd::FALLBACK_GETTABLEKS:
+        return "FALLBACK_GETTABLEKS";
+    case IrCmd::FALLBACK_SETTABLEKS:
+        return "FALLBACK_SETTABLEKS";
+    case IrCmd::FALLBACK_NAMECALL:
+        return "FALLBACK_NAMECALL";
+    case IrCmd::FALLBACK_PREPVARARGS:
+        return "FALLBACK_PREPVARARGS";
+    case IrCmd::FALLBACK_GETVARARGS:
+        return "FALLBACK_GETVARARGS";
+    case IrCmd::FALLBACK_NEWCLOSURE:
+        return "FALLBACK_NEWCLOSURE";
+    case IrCmd::FALLBACK_DUPCLOSURE:
+        return "FALLBACK_DUPCLOSURE";
+    case IrCmd::FALLBACK_FORGPREP:
+        return "FALLBACK_FORGPREP";
+    case IrCmd::SUBSTITUTE:
+        return "SUBSTITUTE";
+    case IrCmd::BITAND_UINT:
+        return "BITAND_UINT";
+    case IrCmd::BITXOR_UINT:
+        return "BITXOR_UINT";
+    case IrCmd::BITOR_UINT:
+        return "BITOR_UINT";
+    case IrCmd::BITNOT_UINT:
+        return "BITNOT_UINT";
+    case IrCmd::BITLSHIFT_UINT:
+        return "BITLSHIFT_UINT";
+    case IrCmd::BITRSHIFT_UINT:
+        return "BITRSHIFT_UINT";
+    case IrCmd::BITARSHIFT_UINT:
+        return "BITARSHIFT_UINT";
+    case IrCmd::BITLROTATE_UINT:
+        return "BITLROTATE_UINT";
+    case IrCmd::BITRROTATE_UINT:
+        return "BITRROTATE_UINT";
+    case IrCmd::BITCOUNTLZ_UINT:
+        return "BITCOUNTLZ_UINT";
+    case IrCmd::BITCOUNTRZ_UINT:
+        return "BITCOUNTRZ_UINT";
+    case IrCmd::INVOKE_LIBM:
+        return "INVOKE_LIBM";
+    }
+
+    LUAU_UNREACHABLE();
+}
+
+const char* getBlockKindName(IrBlockKind kind)
+{
+    switch (kind)
+    {
+    case IrBlockKind::Bytecode:
+        return "bb_bytecode";
+    case IrBlockKind::Fallback:
+        return "bb_fallback";
+    case IrBlockKind::Internal:
+        return "bb";
+    case IrBlockKind::Linearized:
+        return "bb_linear";
+    case IrBlockKind::Dead:
+        return "dead";
+    }
+
+    LUAU_UNREACHABLE();
+}
+
+void toString(IrToStringContext& ctx, const IrInst& inst, uint32_t index)
+{
+    append(ctx.result, "  ");
+
+    // Instructions with a result display target virtual register
+    if (hasResult(inst.cmd))
+        append(ctx.result, "%%%u = ", index);
+
+    ctx.result.append(getCmdName(inst.cmd));
+
+    auto checkOp = [&ctx](IrOp op, const char* sep) {
+        if (op.kind != IrOpKind::None)
+        {
+            ctx.result.append(sep);
+            toString(ctx, op);
+        }
+    };
+
+    checkOp(inst.a, " ");
+    checkOp(inst.b, ", ");
+    checkOp(inst.c, ", ");
+    checkOp(inst.d, ", ");
+    checkOp(inst.e, ", ");
+    checkOp(inst.f, ", ");
+}
+
+void toString(IrToStringContext& ctx, const IrBlock& block, uint32_t index)
+{
+    append(ctx.result, "%s_%u", getBlockKindName(block.kind), index);
+}
+
+void toString(IrToStringContext& ctx, IrOp op)
+{
+    switch (op.kind)
+    {
+    case IrOpKind::None:
+        break;
+    case IrOpKind::Undef:
+        append(ctx.result, "undef");
+        break;
+    case IrOpKind::Constant:
+        toString(ctx.result, ctx.constants[op.index]);
+        break;
+    case IrOpKind::Condition:
+        LUAU_ASSERT(op.index < uint32_t(IrCondition::Count));
+        ctx.result.append(textForCondition[op.index]);
+        break;
+    case IrOpKind::Inst:
+        append(ctx.result, "%%%u", op.index);
+        break;
+    case IrOpKind::Block:
+        append(ctx.result, "%s_%u", getBlockKindName(ctx.blocks[op.index].kind), op.index);
+        break;
+    case IrOpKind::VmReg:
+        append(ctx.result, "R%d", vmRegOp(op));
+        break;
+    case IrOpKind::VmConst:
+        append(ctx.result, "K%d", vmConstOp(op));
+        break;
+    case IrOpKind::VmUpvalue:
+        append(ctx.result, "U%d", vmUpvalueOp(op));
+        break;
+    }
+}
+
+void toString(std::string& result, IrConst constant)
+{
+    switch (constant.kind)
+    {
+    case IrConstKind::Bool:
+        append(result, constant.valueBool ? "true" : "false");
+        break;
+    case IrConstKind::Int:
+        append(result, "%di", constant.valueInt);
+        break;
+    case IrConstKind::Uint:
+        append(result, "%uu", constant.valueUint);
+        break;
+    case IrConstKind::Double:
+        if (constant.valueDouble != constant.valueDouble)
+            append(result, "nan");
+        else
+            append(result, "%.17g", constant.valueDouble);
+        break;
+    case IrConstKind::Tag:
+        result.append(getTagName(constant.valueTag));
+        break;
+    }
+}
+
+static void appendBlockSet(IrToStringContext& ctx, BlockIteratorWrapper blocks)
+{
+    bool comma = false;
+
+    for (uint32_t target : blocks)
+    {
+        if (comma)
+            append(ctx.result, ", ");
+        comma = true;
+
+        toString(ctx, ctx.blocks[target], target);
+    }
+}
+
+static void appendRegisterSet(IrToStringContext& ctx, const RegisterSet& rs, const char* separator)
+{
+    bool comma = false;
+
+    for (size_t i = 0; i < rs.regs.size(); i++)
+    {
+        if (rs.regs.test(i))
+        {
+            if (comma)
+                ctx.result.append(separator);
+            comma = true;
+
+            append(ctx.result, "R%d", int(i));
+        }
+    }
+
+    if (rs.varargSeq)
+    {
+        if (comma)
+            ctx.result.append(separator);
+
+        append(ctx.result, "R%d...", rs.varargStart);
+    }
+}
+
+static RegisterSet getJumpTargetExtraLiveIn(IrToStringContext& ctx, const IrBlock& block, uint32_t blockIdx, const IrInst& inst)
+{
+    RegisterSet extraRs;
+
+    if (blockIdx >= ctx.cfg.in.size())
+        return extraRs;
+
+    const RegisterSet& defRs = ctx.cfg.in[blockIdx];
+
+    // Find first block argument, for guard instructions (isNonTerminatingJump), that's the first and only one
+    LUAU_ASSERT(isNonTerminatingJump(inst.cmd));
+    IrOp op = inst.a;
+
+    if (inst.b.kind == IrOpKind::Block)
+        op = inst.b;
+    else if (inst.c.kind == IrOpKind::Block)
+        op = inst.c;
+    else if (inst.d.kind == IrOpKind::Block)
+        op = inst.d;
+    else if (inst.e.kind == IrOpKind::Block)
+        op = inst.e;
+    else if (inst.f.kind == IrOpKind::Block)
+        op = inst.f;
+
+    if (op.kind == IrOpKind::Block && op.index < ctx.cfg.in.size())
+    {
+        const RegisterSet& inRs = ctx.cfg.in[op.index];
+
+        extraRs.regs = inRs.regs & ~defRs.regs;
+
+        if (inRs.varargSeq)
+            requireVariadicSequence(extraRs, defRs, inRs.varargStart);
+    }
+
+    return extraRs;
+}
+
+void toStringDetailed(IrToStringContext& ctx, const IrBlock& block, uint32_t blockIdx, const IrInst& inst, uint32_t instIdx, bool includeUseInfo)
+{
+    size_t start = ctx.result.size();
+
+    toString(ctx, inst, instIdx);
+
+    if (includeUseInfo)
+    {
+        padToDetailColumn(ctx.result, start);
+
+        if (inst.useCount == 0 && hasSideEffects(inst.cmd))
+        {
+            if (isNonTerminatingJump(inst.cmd))
+            {
+                RegisterSet extraRs = getJumpTargetExtraLiveIn(ctx, block, blockIdx, inst);
+
+                if (extraRs.regs.any() || extraRs.varargSeq)
+                {
+                    append(ctx.result, "; %%%u, extra in: ", instIdx);
+                    appendRegisterSet(ctx, extraRs, ", ");
+                    ctx.result.append("\n");
+                }
+                else
+                {
+                    append(ctx.result, "; %%%u\n", instIdx);
+                }
+            }
+            else
+            {
+                append(ctx.result, "; %%%u\n", instIdx);
+            }
+        }
+        else
+        {
+            append(ctx.result, "; useCount: %d, lastUse: %%%u\n", inst.useCount, inst.lastUse);
+        }
+    }
+    else
+    {
+        ctx.result.append("\n");
+    }
+}
+
+void toStringDetailed(IrToStringContext& ctx, const IrBlock& block, uint32_t index, bool includeUseInfo)
+{
+    // Report captured registers for entry block
+    if (block.useCount == 0 && block.kind != IrBlockKind::Dead && ctx.cfg.captured.regs.any())
+    {
+        append(ctx.result, "; captured regs: ");
+        appendRegisterSet(ctx, ctx.cfg.captured, ", ");
+        append(ctx.result, "\n\n");
+    }
+
+    size_t start = ctx.result.size();
+
+    toString(ctx, block, index);
+    append(ctx.result, ":");
+
+    if (includeUseInfo)
+    {
+        padToDetailColumn(ctx.result, start);
+
+        append(ctx.result, "; useCount: %d\n", block.useCount);
+    }
+    else
+    {
+        ctx.result.append("\n");
+    }
+
+    // Predecessor list
+    if (index < ctx.cfg.predecessorsOffsets.size())
+    {
+        BlockIteratorWrapper pred = predecessors(ctx.cfg, index);
+
+        if (!pred.empty())
+        {
+            append(ctx.result, "; predecessors: ");
+
+            appendBlockSet(ctx, pred);
+            append(ctx.result, "\n");
+        }
+    }
+
+    // Successor list
+    if (index < ctx.cfg.successorsOffsets.size())
+    {
+        BlockIteratorWrapper succ = successors(ctx.cfg, index);
+
+        if (!succ.empty())
+        {
+            append(ctx.result, "; successors: ");
+
+            appendBlockSet(ctx, succ);
+            append(ctx.result, "\n");
+        }
+    }
+
+    // Live-in VM regs
+    if (index < ctx.cfg.in.size())
+    {
+        const RegisterSet& in = ctx.cfg.in[index];
+
+        if (in.regs.any() || in.varargSeq)
+        {
+            append(ctx.result, "; in regs: ");
+            appendRegisterSet(ctx, in, ", ");
+            append(ctx.result, "\n");
+        }
+    }
+
+    // Live-out VM regs
+    if (index < ctx.cfg.out.size())
+    {
+        const RegisterSet& out = ctx.cfg.out[index];
+
+        if (out.regs.any() || out.varargSeq)
+        {
+            append(ctx.result, "; out regs: ");
+            appendRegisterSet(ctx, out, ", ");
+            append(ctx.result, "\n");
+        }
+    }
+}
+
+std::string toString(const IrFunction& function, bool includeUseInfo)
+{
+    std::string result;
+    IrToStringContext ctx{result, function.blocks, function.constants, function.cfg};
+
+    for (size_t i = 0; i < function.blocks.size(); i++)
+    {
+        const IrBlock& block = function.blocks[i];
+
+        if (block.kind == IrBlockKind::Dead)
+            continue;
+
+        toStringDetailed(ctx, block, uint32_t(i), includeUseInfo);
+
+        if (block.start == ~0u)
+        {
+            append(ctx.result, " *empty*\n\n");
+            continue;
+        }
+
+        // To allow dumping blocks that are still being constructed, we can't rely on terminator and need a bounds check
+        for (uint32_t index = block.start; index <= block.finish && index < uint32_t(function.instructions.size()); index++)
+        {
+            const IrInst& inst = function.instructions[index];
+
+            // Skip pseudo instructions unless they are still referenced
+            if (isPseudo(inst.cmd) && inst.useCount == 0)
+                continue;
+
+            append(ctx.result, " ");
+            toStringDetailed(ctx, block, uint32_t(i), inst, index, includeUseInfo);
+        }
+
+        append(ctx.result, "\n");
+    }
+
+    return result;
+}
+
+std::string dump(const IrFunction& function)
+{
+    std::string result = toString(function, /* includeUseInfo */ true);
+
+    printf("%s\n", result.c_str());
+
+    return result;
+}
+
+std::string toDot(const IrFunction& function, bool includeInst)
+{
+    std::string result;
+    IrToStringContext ctx{result, function.blocks, function.constants, function.cfg};
+
+    auto appendLabelRegset = [&ctx](const std::vector<RegisterSet>& regSets, size_t blockIdx, const char* name) {
+        if (blockIdx < regSets.size())
+        {
+            const RegisterSet& rs = regSets[blockIdx];
+
+            if (rs.regs.any() || rs.varargSeq)
+            {
+                append(ctx.result, "|{%s|", name);
+                appendRegisterSet(ctx, rs, "|");
+                append(ctx.result, "}");
+            }
+        }
+    };
+
+    append(ctx.result, "digraph CFG {\n");
+    append(ctx.result, "node[shape=record]\n");
+
+    for (size_t i = 0; i < function.blocks.size(); i++)
+    {
+        const IrBlock& block = function.blocks[i];
+
+        append(ctx.result, "b%u [", unsigned(i));
+
+        if (block.kind == IrBlockKind::Fallback)
+            append(ctx.result, "style=filled;fillcolor=salmon;");
+        else if (block.kind == IrBlockKind::Bytecode)
+            append(ctx.result, "style=filled;fillcolor=palegreen;");
+
+        append(ctx.result, "label=\"{");
+        toString(ctx, block, uint32_t(i));
+
+        appendLabelRegset(ctx.cfg.in, i, "in");
+
+        if (includeInst && block.start != ~0u)
+        {
+            for (uint32_t instIdx = block.start; instIdx <= block.finish; instIdx++)
+            {
+                const IrInst& inst = function.instructions[instIdx];
+
+                // Skip pseudo instructions unless they are still referenced
+                if (isPseudo(inst.cmd) && inst.useCount == 0)
+                    continue;
+
+                append(ctx.result, "|");
+                toString(ctx, inst, instIdx);
+            }
+        }
+
+        appendLabelRegset(ctx.cfg.def, i, "def");
+        appendLabelRegset(ctx.cfg.out, i, "out");
+
+        append(ctx.result, "}\"];\n");
+    }
+
+    for (size_t i = 0; i < function.blocks.size(); i++)
+    {
+        const IrBlock& block = function.blocks[i];
+
+        if (block.start == ~0u)
+            continue;
+
+        for (uint32_t instIdx = block.start; instIdx != ~0u && instIdx <= block.finish; instIdx++)
+        {
+            const IrInst& inst = function.instructions[instIdx];
+
+            auto checkOp = [&](IrOp op) {
+                if (op.kind == IrOpKind::Block)
+                {
+                    if (function.blocks[op.index].kind != IrBlockKind::Fallback)
+                        append(ctx.result, "b%u -> b%u [weight=10];\n", unsigned(i), op.index);
+                    else
+                        append(ctx.result, "b%u -> b%u;\n", unsigned(i), op.index);
+                }
+            };
+
+            checkOp(inst.a);
+            checkOp(inst.b);
+            checkOp(inst.c);
+            checkOp(inst.d);
+            checkOp(inst.e);
+            checkOp(inst.f);
+        }
+    }
+
+    append(ctx.result, "}\n");
+
+    return result;
+}
+
+std::string dumpDot(const IrFunction& function, bool includeInst)
+{
+    std::string result = toDot(function, includeInst);
+
+    printf("%s\n", result.c_str());
+
+    return result;
+}
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/IrLoweringA64.cpp
+++ b/luau/CodeGen/src/IrLoweringA64.cpp
--- a/luau/CodeGen/src/IrLoweringA64.h
+++ b/luau/CodeGen/src/IrLoweringA64.h
@ -0,0 +1,75 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/AssemblyBuilderA64.h"
+#include "Luau/IrData.h"
+
+#include "IrRegAllocA64.h"
+#include "IrValueLocationTracking.h"
+
+#include <vector>
+
+struct Proto;
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+struct ModuleHelpers;
+struct NativeState;
+struct AssemblyOptions;
+
+namespace A64
+{
+
+struct IrLoweringA64
+{
+    IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, NativeState& data, Proto* proto, IrFunction& function);
+
+    void lowerInst(IrInst& inst, uint32_t index, IrBlock& next);
+    void finishBlock();
+
+    bool hasError() const;
+
+    bool isFallthroughBlock(IrBlock target, IrBlock next);
+    void jumpOrFallthrough(IrBlock& target, IrBlock& next);
+
+    // Operand data build helpers
+    // May emit data/address synthesis instructions
+    RegisterA64 tempDouble(IrOp op);
+    RegisterA64 tempInt(IrOp op);
+    RegisterA64 tempUint(IrOp op);
+    AddressA64 tempAddr(IrOp op, int offset);
+
+    // May emit restore instructions
+    RegisterA64 regOp(IrOp op);
+
+    // Operand data lookup helpers
+    IrConst constOp(IrOp op) const;
+    uint8_t tagOp(IrOp op) const;
+    bool boolOp(IrOp op) const;
+    int intOp(IrOp op) const;
+    unsigned uintOp(IrOp op) const;
+    double doubleOp(IrOp op) const;
+
+    IrBlock& blockOp(IrOp op) const;
+    Label& labelOp(IrOp op) const;
+
+    AssemblyBuilderA64& build;
+    ModuleHelpers& helpers;
+    NativeState& data;
+    Proto* proto = nullptr; // Temporarily required to provide 'Instruction* pc' to old emitInst* methods
+
+    IrFunction& function;
+
+    IrRegAllocA64 regs;
+
+    IrValueLocationTracking valueTracker;
+
+    bool error = false;
+};
+
+} // namespace A64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/IrLoweringX64.cpp
+++ b/luau/CodeGen/src/IrLoweringX64.cpp
--- a/luau/CodeGen/src/IrLoweringX64.h
+++ b/luau/CodeGen/src/IrLoweringX64.h
@ -0,0 +1,69 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/AssemblyBuilderX64.h"
+#include "Luau/IrData.h"
+#include "Luau/IrRegAllocX64.h"
+
+#include "IrValueLocationTracking.h"
+
+#include <vector>
+
+struct Proto;
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+struct ModuleHelpers;
+struct NativeState;
+struct AssemblyOptions;
+
+namespace X64
+{
+
+struct IrLoweringX64
+{
+    IrLoweringX64(AssemblyBuilderX64& build, ModuleHelpers& helpers, NativeState& data, IrFunction& function);
+
+    void lowerInst(IrInst& inst, uint32_t index, IrBlock& next);
+    void finishBlock();
+
+    bool hasError() const;
+
+    bool isFallthroughBlock(IrBlock target, IrBlock next);
+    void jumpOrFallthrough(IrBlock& target, IrBlock& next);
+
+    void storeDoubleAsFloat(OperandX64 dst, IrOp src);
+
+    // Operand data lookup helpers
+    OperandX64 memRegDoubleOp(IrOp op);
+    OperandX64 memRegUintOp(IrOp op);
+    OperandX64 memRegTagOp(IrOp op);
+    RegisterX64 regOp(IrOp op);
+
+    IrConst constOp(IrOp op) const;
+    uint8_t tagOp(IrOp op) const;
+    bool boolOp(IrOp op) const;
+    int intOp(IrOp op) const;
+    unsigned uintOp(IrOp op) const;
+    double doubleOp(IrOp op) const;
+
+    IrBlock& blockOp(IrOp op) const;
+    Label& labelOp(IrOp op) const;
+
+    AssemblyBuilderX64& build;
+    ModuleHelpers& helpers;
+    NativeState& data;
+
+    IrFunction& function;
+
+    IrRegAllocX64 regs;
+
+    IrValueLocationTracking valueTracker;
+};
+
+} // namespace X64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/IrRegAllocA64.cpp
+++ b/luau/CodeGen/src/IrRegAllocA64.cpp
@ -0,0 +1,435 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "IrRegAllocA64.h"
+
+#include "Luau/AssemblyBuilderA64.h"
+#include "Luau/IrUtils.h"
+
+#include "BitUtils.h"
+#include "EmitCommonA64.h"
+
+#include <string.h>
+
+LUAU_FASTFLAGVARIABLE(DebugLuauCodegenChaosA64, false)
+
+namespace Luau
+{
+namespace CodeGen
+{
+namespace A64
+{
+
+static int allocSpill(uint32_t& free, KindA64 kind)
+{
+    LUAU_ASSERT(kStackSize <= 256); // to support larger stack frames, we need to ensure qN is allocated at 16b boundary to fit in ldr/str encoding
+
+    // qN registers use two consecutive slots
+    int slot = countrz(kind == KindA64::q ? free & (free >> 1) : free);
+    if (slot == 32)
+        return -1;
+
+    uint32_t mask = (kind == KindA64::q ? 3u : 1u) << slot;
+
+    LUAU_ASSERT((free & mask) == mask);
+    free &= ~mask;
+
+    return slot;
+}
+
+static void freeSpill(uint32_t& free, KindA64 kind, uint8_t slot)
+{
+    // qN registers use two consecutive slots
+    uint32_t mask = (kind == KindA64::q ? 3u : 1u) << slot;
+
+    LUAU_ASSERT((free & mask) == 0);
+    free |= mask;
+}
+
+static int getReloadOffset(IrCmd cmd)
+{
+    switch (getCmdValueKind(cmd))
+    {
+    case IrValueKind::Unknown:
+    case IrValueKind::None:
+        LUAU_ASSERT(!"Invalid operand restore value kind");
+        break;
+    case IrValueKind::Tag:
+        return offsetof(TValue, tt);
+    case IrValueKind::Int:
+        return offsetof(TValue, value);
+    case IrValueKind::Pointer:
+        return offsetof(TValue, value.gc);
+    case IrValueKind::Double:
+        return offsetof(TValue, value.n);
+    case IrValueKind::Tvalue:
+        return 0;
+    }
+
+    LUAU_ASSERT(!"Invalid operand restore value kind");
+    LUAU_UNREACHABLE();
+}
+
+static AddressA64 getReloadAddress(const IrFunction& function, const IrInst& inst)
+{
+    IrOp location = function.findRestoreOp(inst);
+
+    if (location.kind == IrOpKind::VmReg)
+        return mem(rBase, vmRegOp(location) * sizeof(TValue) + getReloadOffset(inst.cmd));
+
+    // loads are 4/8/16 bytes; we conservatively limit the offset to fit assuming a 4b index
+    if (location.kind == IrOpKind::VmConst && vmConstOp(location) * sizeof(TValue) <= AddressA64::kMaxOffset * 4)
+        return mem(rConstants, vmConstOp(location) * sizeof(TValue) + getReloadOffset(inst.cmd));
+
+    return AddressA64(xzr); // dummy
+}
+
+static void restoreInst(AssemblyBuilderA64& build, uint32_t& freeSpillSlots, IrFunction& function, const IrRegAllocA64::Spill& s, RegisterA64 reg)
+{
+    IrInst& inst = function.instructions[s.inst];
+    LUAU_ASSERT(inst.regA64 == noreg);
+
+    if (s.slot >= 0)
+    {
+        build.ldr(reg, mem(sp, sSpillArea.data + s.slot * 8));
+
+        freeSpill(freeSpillSlots, reg.kind, s.slot);
+    }
+    else
+    {
+        LUAU_ASSERT(!inst.spilled && inst.needsReload);
+        AddressA64 addr = getReloadAddress(function, function.instructions[s.inst]);
+        LUAU_ASSERT(addr.base != xzr);
+        build.ldr(reg, addr);
+    }
+
+    inst.spilled = false;
+    inst.needsReload = false;
+    inst.regA64 = reg;
+}
+
+IrRegAllocA64::IrRegAllocA64(IrFunction& function, std::initializer_list<std::pair<RegisterA64, RegisterA64>> regs)
+    : function(function)
+{
+    for (auto& p : regs)
+    {
+        LUAU_ASSERT(p.first.kind == p.second.kind && p.first.index <= p.second.index);
+
+        Set& set = getSet(p.first.kind);
+
+        for (int i = p.first.index; i <= p.second.index; ++i)
+            set.base |= 1u << i;
+    }
+
+    gpr.free = gpr.base;
+    simd.free = simd.base;
+
+    memset(gpr.defs, -1, sizeof(gpr.defs));
+    memset(simd.defs, -1, sizeof(simd.defs));
+
+    LUAU_ASSERT(kSpillSlots <= 32);
+    freeSpillSlots = (kSpillSlots == 32) ? ~0u : (1u << kSpillSlots) - 1;
+}
+
+RegisterA64 IrRegAllocA64::allocReg(KindA64 kind, uint32_t index)
+{
+    Set& set = getSet(kind);
+
+    if (set.free == 0)
+    {
+        // TODO: remember the error and fail lowering
+        LUAU_ASSERT(!"Out of registers to allocate");
+        return noreg;
+    }
+
+    int reg = 31 - countlz(set.free);
+
+    if (FFlag::DebugLuauCodegenChaosA64)
+        reg = countrz(set.free); // allocate from low end; this causes extra conflicts for calls
+
+    set.free &= ~(1u << reg);
+    set.defs[reg] = index;
+
+    return RegisterA64{kind, uint8_t(reg)};
+}
+
+RegisterA64 IrRegAllocA64::allocTemp(KindA64 kind)
+{
+    Set& set = getSet(kind);
+
+    if (set.free == 0)
+    {
+        // TODO: remember the error and fail lowering
+        LUAU_ASSERT(!"Out of registers to allocate");
+        return noreg;
+    }
+
+    int reg = 31 - countlz(set.free);
+
+    if (FFlag::DebugLuauCodegenChaosA64)
+        reg = countrz(set.free); // allocate from low end; this causes extra conflicts for calls
+
+    set.free &= ~(1u << reg);
+    set.temp |= 1u << reg;
+    LUAU_ASSERT(set.defs[reg] == kInvalidInstIdx);
+
+    return RegisterA64{kind, uint8_t(reg)};
+}
+
+RegisterA64 IrRegAllocA64::allocReuse(KindA64 kind, uint32_t index, std::initializer_list<IrOp> oprefs)
+{
+    for (IrOp op : oprefs)
+    {
+        if (op.kind != IrOpKind::Inst)
+            continue;
+
+        IrInst& source = function.instructions[op.index];
+
+        if (source.lastUse == index && !source.reusedReg && source.regA64 != noreg)
+        {
+            LUAU_ASSERT(!source.spilled && !source.needsReload);
+            LUAU_ASSERT(source.regA64.kind == kind);
+
+            Set& set = getSet(kind);
+            LUAU_ASSERT(set.defs[source.regA64.index] == op.index);
+            set.defs[source.regA64.index] = index;
+
+            source.reusedReg = true;
+            return source.regA64;
+        }
+    }
+
+    return allocReg(kind, index);
+}
+
+RegisterA64 IrRegAllocA64::takeReg(RegisterA64 reg, uint32_t index)
+{
+    Set& set = getSet(reg.kind);
+
+    LUAU_ASSERT(set.free & (1u << reg.index));
+    LUAU_ASSERT(set.defs[reg.index] == kInvalidInstIdx);
+
+    set.free &= ~(1u << reg.index);
+    set.defs[reg.index] = index;
+
+    return reg;
+}
+
+void IrRegAllocA64::freeReg(RegisterA64 reg)
+{
+    Set& set = getSet(reg.kind);
+
+    LUAU_ASSERT((set.base & (1u << reg.index)) != 0);
+    LUAU_ASSERT((set.free & (1u << reg.index)) == 0);
+    LUAU_ASSERT((set.temp & (1u << reg.index)) == 0);
+
+    set.free |= 1u << reg.index;
+    set.defs[reg.index] = kInvalidInstIdx;
+}
+
+void IrRegAllocA64::freeLastUseReg(IrInst& target, uint32_t index)
+{
+    if (target.lastUse == index && !target.reusedReg)
+    {
+        LUAU_ASSERT(!target.spilled && !target.needsReload);
+
+        // Register might have already been freed if it had multiple uses inside a single instruction
+        if (target.regA64 == noreg)
+            return;
+
+        freeReg(target.regA64);
+        target.regA64 = noreg;
+    }
+}
+
+void IrRegAllocA64::freeLastUseRegs(const IrInst& inst, uint32_t index)
+{
+    auto checkOp = [this, index](IrOp op) {
+        if (op.kind == IrOpKind::Inst)
+            freeLastUseReg(function.instructions[op.index], index);
+    };
+
+    checkOp(inst.a);
+    checkOp(inst.b);
+    checkOp(inst.c);
+    checkOp(inst.d);
+    checkOp(inst.e);
+    checkOp(inst.f);
+}
+
+void IrRegAllocA64::freeTempRegs()
+{
+    LUAU_ASSERT((gpr.free & gpr.temp) == 0);
+    gpr.free |= gpr.temp;
+    gpr.temp = 0;
+
+    LUAU_ASSERT((simd.free & simd.temp) == 0);
+    simd.free |= simd.temp;
+    simd.temp = 0;
+}
+
+size_t IrRegAllocA64::spill(AssemblyBuilderA64& build, uint32_t index, std::initializer_list<RegisterA64> live)
+{
+    static const KindA64 sets[] = {KindA64::x, KindA64::q};
+
+    size_t start = spills.size();
+
+    uint32_t poisongpr = 0;
+    uint32_t poisonsimd = 0;
+
+    if (FFlag::DebugLuauCodegenChaosA64)
+    {
+        poisongpr = gpr.base & ~gpr.free;
+        poisonsimd = simd.base & ~simd.free;
+
+        for (RegisterA64 reg : live)
+        {
+            Set& set = getSet(reg.kind);
+            (&set == &simd ? poisonsimd : poisongpr) &= ~(1u << reg.index);
+        }
+    }
+
+    for (KindA64 kind : sets)
+    {
+        Set& set = getSet(kind);
+
+        // early-out
+        if (set.free == set.base)
+            continue;
+
+        // free all temp registers
+        LUAU_ASSERT((set.free & set.temp) == 0);
+        set.free |= set.temp;
+        set.temp = 0;
+
+        // spill all allocated registers unless they aren't used anymore
+        uint32_t regs = set.base & ~set.free;
+
+        while (regs)
+        {
+            int reg = 31 - countlz(regs);
+
+            uint32_t inst = set.defs[reg];
+            LUAU_ASSERT(inst != kInvalidInstIdx);
+
+            IrInst& def = function.instructions[inst];
+            LUAU_ASSERT(def.regA64.index == reg);
+            LUAU_ASSERT(!def.reusedReg);
+            LUAU_ASSERT(!def.spilled);
+            LUAU_ASSERT(!def.needsReload);
+
+            if (def.lastUse == index)
+            {
+                // instead of spilling the register to never reload it, we assume the register is not needed anymore
+            }
+            else if (getReloadAddress(function, def).base != xzr)
+            {
+                // instead of spilling the register to stack, we can reload it from VM stack/constants
+                // we still need to record the spill for restore(start) to work
+                Spill s = {inst, def.regA64, -1};
+                spills.push_back(s);
+
+                def.needsReload = true;
+            }
+            else
+            {
+                int slot = allocSpill(freeSpillSlots, def.regA64.kind);
+                LUAU_ASSERT(slot >= 0); // TODO: remember the error and fail lowering
+
+                build.str(def.regA64, mem(sp, sSpillArea.data + slot * 8));
+
+                Spill s = {inst, def.regA64, int8_t(slot)};
+                spills.push_back(s);
+
+                def.spilled = true;
+            }
+
+            def.regA64 = noreg;
+
+            regs &= ~(1u << reg);
+            set.free |= 1u << reg;
+            set.defs[reg] = kInvalidInstIdx;
+        }
+
+        LUAU_ASSERT(set.free == set.base);
+    }
+
+    if (FFlag::DebugLuauCodegenChaosA64)
+    {
+        for (int reg = 0; reg < 32; ++reg)
+        {
+            if (poisongpr & (1u << reg))
+                build.mov(RegisterA64{KindA64::x, uint8_t(reg)}, 0xdead);
+            if (poisonsimd & (1u << reg))
+                build.fmov(RegisterA64{KindA64::d, uint8_t(reg)}, -0.125);
+        }
+    }
+
+    return start;
+}
+
+void IrRegAllocA64::restore(AssemblyBuilderA64& build, size_t start)
+{
+    LUAU_ASSERT(start <= spills.size());
+
+    if (start < spills.size())
+    {
+        for (size_t i = start; i < spills.size(); ++i)
+        {
+            Spill s = spills[i]; // copy in case takeReg reallocates spills
+            RegisterA64 reg = takeReg(s.origin, s.inst);
+
+            restoreInst(build, freeSpillSlots, function, s, reg);
+        }
+
+        spills.resize(start);
+    }
+}
+
+void IrRegAllocA64::restoreReg(AssemblyBuilderA64& build, IrInst& inst)
+{
+    uint32_t index = function.getInstIndex(inst);
+
+    for (size_t i = 0; i < spills.size(); ++i)
+    {
+        if (spills[i].inst == index)
+        {
+            Spill s = spills[i]; // copy in case allocReg reallocates spills
+            RegisterA64 reg = allocReg(s.origin.kind, index);
+
+            restoreInst(build, freeSpillSlots, function, s, reg);
+
+            spills[i] = spills.back();
+            spills.pop_back();
+            return;
+        }
+    }
+
+    LUAU_ASSERT(!"Expected to find a spill record");
+}
+
+void IrRegAllocA64::assertNoSpills() const
+{
+    LUAU_ASSERT(spills.empty());
+}
+
+IrRegAllocA64::Set& IrRegAllocA64::getSet(KindA64 kind)
+{
+    switch (kind)
+    {
+    case KindA64::x:
+    case KindA64::w:
+        return gpr;
+
+    case KindA64::s:
+    case KindA64::d:
+    case KindA64::q:
+        return simd;
+
+    default:
+        LUAU_ASSERT(!"Unexpected register kind");
+        LUAU_UNREACHABLE();
+    }
+}
+
+} // namespace A64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/IrRegAllocA64.h
+++ b/luau/CodeGen/src/IrRegAllocA64.h
@ -0,0 +1,84 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/IrData.h"
+#include "Luau/RegisterA64.h"
+
+#include <initializer_list>
+#include <utility>
+#include <vector>
+
+namespace Luau
+{
+namespace CodeGen
+{
+namespace A64
+{
+
+class AssemblyBuilderA64;
+
+struct IrRegAllocA64
+{
+    IrRegAllocA64(IrFunction& function, std::initializer_list<std::pair<RegisterA64, RegisterA64>> regs);
+
+    RegisterA64 allocReg(KindA64 kind, uint32_t index);
+    RegisterA64 allocTemp(KindA64 kind);
+    RegisterA64 allocReuse(KindA64 kind, uint32_t index, std::initializer_list<IrOp> oprefs);
+
+    RegisterA64 takeReg(RegisterA64 reg, uint32_t index);
+
+    void freeReg(RegisterA64 reg);
+
+    void freeLastUseReg(IrInst& target, uint32_t index);
+    void freeLastUseRegs(const IrInst& inst, uint32_t index);
+
+    void freeTempRegs();
+
+    // Spills all live registers that outlive current instruction; all allocated registers are assumed to be undefined
+    size_t spill(AssemblyBuilderA64& build, uint32_t index, std::initializer_list<RegisterA64> live = {});
+
+    // Restores registers starting from the offset returned by spill(); all spills will be restored to the original registers
+    void restore(AssemblyBuilderA64& build, size_t start);
+
+    // Restores register for a single instruction; may not assign the previously used register!
+    void restoreReg(AssemblyBuilderA64& build, IrInst& inst);
+
+    void assertNoSpills() const;
+
+    struct Set
+    {
+        // which registers are in the set that the allocator manages (initialized at construction)
+        uint32_t base = 0;
+
+        // which subset of initial set is free
+        uint32_t free = 0;
+
+        // which subset of initial set is allocated as temporary
+        uint32_t temp = 0;
+
+        // which instruction is defining which register (for spilling); only valid if not free and not temp
+        uint32_t defs[32];
+    };
+
+    struct Spill
+    {
+        uint32_t inst;
+
+        RegisterA64 origin;
+        int8_t slot;
+    };
+
+    Set& getSet(KindA64 kind);
+
+    IrFunction& function;
+    Set gpr, simd;
+
+    std::vector<Spill> spills;
+
+    // which 8-byte slots are free
+    uint32_t freeSpillSlots = 0;
+};
+
+} // namespace A64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/IrRegAllocX64.cpp
+++ b/luau/CodeGen/src/IrRegAllocX64.cpp
@ -0,0 +1,492 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "Luau/IrRegAllocX64.h"
+
+#include "Luau/IrUtils.h"
+
+#include "EmitCommonX64.h"
+
+namespace Luau
+{
+namespace CodeGen
+{
+namespace X64
+{
+
+static const RegisterX64 kGprAllocOrder[] = {rax, rdx, rcx, rbx, rsi, rdi, r8, r9, r10, r11};
+
+IrRegAllocX64::IrRegAllocX64(AssemblyBuilderX64& build, IrFunction& function)
+    : build(build)
+    , function(function)
+{
+    freeGprMap.fill(true);
+    gprInstUsers.fill(kInvalidInstIdx);
+    freeXmmMap.fill(true);
+    xmmInstUsers.fill(kInvalidInstIdx);
+}
+
+RegisterX64 IrRegAllocX64::allocReg(SizeX64 size, uint32_t instIdx)
+{
+    if (size == SizeX64::xmmword)
+    {
+        for (size_t i = 0; i < freeXmmMap.size(); ++i)
+        {
+            if (freeXmmMap[i])
+            {
+                freeXmmMap[i] = false;
+                xmmInstUsers[i] = instIdx;
+                return RegisterX64{size, uint8_t(i)};
+            }
+        }
+    }
+    else
+    {
+        for (RegisterX64 reg : kGprAllocOrder)
+        {
+            if (freeGprMap[reg.index])
+            {
+                freeGprMap[reg.index] = false;
+                gprInstUsers[reg.index] = instIdx;
+                return RegisterX64{size, reg.index};
+            }
+        }
+    }
+
+    // Out of registers, spill the value with the furthest next use
+    const std::array<uint32_t, 16>& regInstUsers = size == SizeX64::xmmword ? xmmInstUsers : gprInstUsers;
+    if (uint32_t furthestUseTarget = findInstructionWithFurthestNextUse(regInstUsers); furthestUseTarget != kInvalidInstIdx)
+        return takeReg(function.instructions[furthestUseTarget].regX64, instIdx);
+
+    LUAU_ASSERT(!"Out of registers to allocate");
+    return noreg;
+}
+
+RegisterX64 IrRegAllocX64::allocRegOrReuse(SizeX64 size, uint32_t instIdx, std::initializer_list<IrOp> oprefs)
+{
+    for (IrOp op : oprefs)
+    {
+        if (op.kind != IrOpKind::Inst)
+            continue;
+
+        IrInst& source = function.instructions[op.index];
+
+        if (source.lastUse == instIdx && !source.reusedReg && !source.spilled && !source.needsReload)
+        {
+            // Not comparing size directly because we only need matching register set
+            if ((size == SizeX64::xmmword) != (source.regX64.size == SizeX64::xmmword))
+                continue;
+
+            LUAU_ASSERT(source.regX64 != noreg);
+
+            source.reusedReg = true;
+
+            if (size == SizeX64::xmmword)
+                xmmInstUsers[source.regX64.index] = instIdx;
+            else
+                gprInstUsers[source.regX64.index] = instIdx;
+
+            return RegisterX64{size, source.regX64.index};
+        }
+    }
+
+    return allocReg(size, instIdx);
+}
+
+RegisterX64 IrRegAllocX64::takeReg(RegisterX64 reg, uint32_t instIdx)
+{
+    if (reg.size == SizeX64::xmmword)
+    {
+        if (!freeXmmMap[reg.index])
+        {
+            LUAU_ASSERT(xmmInstUsers[reg.index] != kInvalidInstIdx);
+            preserve(function.instructions[xmmInstUsers[reg.index]]);
+        }
+
+        LUAU_ASSERT(freeXmmMap[reg.index]);
+        freeXmmMap[reg.index] = false;
+        xmmInstUsers[reg.index] = instIdx;
+    }
+    else
+    {
+        if (!freeGprMap[reg.index])
+        {
+            LUAU_ASSERT(gprInstUsers[reg.index] != kInvalidInstIdx);
+            preserve(function.instructions[gprInstUsers[reg.index]]);
+        }
+
+        LUAU_ASSERT(freeGprMap[reg.index]);
+        freeGprMap[reg.index] = false;
+        gprInstUsers[reg.index] = instIdx;
+    }
+
+    return reg;
+}
+
+void IrRegAllocX64::freeReg(RegisterX64 reg)
+{
+    if (reg.size == SizeX64::xmmword)
+    {
+        LUAU_ASSERT(!freeXmmMap[reg.index]);
+        freeXmmMap[reg.index] = true;
+        xmmInstUsers[reg.index] = kInvalidInstIdx;
+    }
+    else
+    {
+        LUAU_ASSERT(!freeGprMap[reg.index]);
+        freeGprMap[reg.index] = true;
+        gprInstUsers[reg.index] = kInvalidInstIdx;
+    }
+}
+
+void IrRegAllocX64::freeLastUseReg(IrInst& target, uint32_t instIdx)
+{
+    if (isLastUseReg(target, instIdx))
+    {
+        LUAU_ASSERT(!target.spilled && !target.needsReload);
+
+        // Register might have already been freed if it had multiple uses inside a single instruction
+        if (target.regX64 == noreg)
+            return;
+
+        freeReg(target.regX64);
+        target.regX64 = noreg;
+    }
+}
+
+void IrRegAllocX64::freeLastUseRegs(const IrInst& inst, uint32_t instIdx)
+{
+    auto checkOp = [this, instIdx](IrOp op) {
+        if (op.kind == IrOpKind::Inst)
+            freeLastUseReg(function.instructions[op.index], instIdx);
+    };
+
+    checkOp(inst.a);
+    checkOp(inst.b);
+    checkOp(inst.c);
+    checkOp(inst.d);
+    checkOp(inst.e);
+    checkOp(inst.f);
+}
+
+bool IrRegAllocX64::isLastUseReg(const IrInst& target, uint32_t instIdx) const
+{
+    return target.lastUse == instIdx && !target.reusedReg;
+}
+
+void IrRegAllocX64::preserve(IrInst& inst)
+{
+    IrSpillX64 spill;
+    spill.instIdx = function.getInstIndex(inst);
+    spill.valueKind = getCmdValueKind(inst.cmd);
+    spill.spillId = nextSpillId++;
+    spill.originalLoc = inst.regX64;
+
+    // Loads from VmReg/VmConst don't have to be spilled, they can be restored from a register later
+    if (!hasRestoreOp(inst))
+    {
+        unsigned i = findSpillStackSlot(spill.valueKind);
+
+        if (spill.valueKind == IrValueKind::Tvalue)
+            build.vmovups(xmmword[sSpillArea + i * 8], inst.regX64);
+        else if (spill.valueKind == IrValueKind::Double)
+            build.vmovsd(qword[sSpillArea + i * 8], inst.regX64);
+        else if (spill.valueKind == IrValueKind::Pointer)
+            build.mov(qword[sSpillArea + i * 8], inst.regX64);
+        else if (spill.valueKind == IrValueKind::Tag || spill.valueKind == IrValueKind::Int)
+            build.mov(dword[sSpillArea + i * 8], inst.regX64);
+        else
+            LUAU_ASSERT(!"unsupported value kind");
+
+        usedSpillSlots.set(i);
+
+        if (i + 1 > maxUsedSlot)
+            maxUsedSlot = i + 1;
+
+        if (spill.valueKind == IrValueKind::Tvalue)
+        {
+            usedSpillSlots.set(i + 1);
+
+            if (i + 2 > maxUsedSlot)
+                maxUsedSlot = i + 2;
+        }
+
+        spill.stackSlot = uint8_t(i);
+        inst.spilled = true;
+    }
+    else
+    {
+        inst.needsReload = true;
+    }
+
+    spills.push_back(spill);
+
+    freeReg(inst.regX64);
+    inst.regX64 = noreg;
+}
+
+void IrRegAllocX64::restore(IrInst& inst, bool intoOriginalLocation)
+{
+    uint32_t instIdx = function.getInstIndex(inst);
+
+    for (size_t i = 0; i < spills.size(); i++)
+    {
+        if (spills[i].instIdx == instIdx)
+        {
+            RegisterX64 reg = intoOriginalLocation ? takeReg(spills[i].originalLoc, instIdx) : allocReg(spills[i].originalLoc.size, instIdx);
+            OperandX64 restoreLocation = noreg;
+
+            // Previous call might have relocated the spill vector, so this reference can't be taken earlier
+            const IrSpillX64& spill = spills[i];
+
+            if (spill.stackSlot != kNoStackSlot)
+            {
+                restoreLocation = addr[sSpillArea + spill.stackSlot * 8];
+                restoreLocation.memSize = reg.size;
+
+                usedSpillSlots.set(spill.stackSlot, false);
+
+                if (spill.valueKind == IrValueKind::Tvalue)
+                    usedSpillSlots.set(spill.stackSlot + 1, false);
+            }
+            else
+            {
+                restoreLocation = getRestoreAddress(inst, getRestoreOp(inst));
+            }
+
+            if (spill.valueKind == IrValueKind::Tvalue)
+                build.vmovups(reg, restoreLocation);
+            else if (spill.valueKind == IrValueKind::Double)
+                build.vmovsd(reg, restoreLocation);
+            else
+                build.mov(reg, restoreLocation);
+
+            inst.regX64 = reg;
+            inst.spilled = false;
+            inst.needsReload = false;
+
+            spills[i] = spills.back();
+            spills.pop_back();
+            return;
+        }
+    }
+}
+
+void IrRegAllocX64::preserveAndFreeInstValues()
+{
+    for (uint32_t instIdx : gprInstUsers)
+    {
+        if (instIdx != kInvalidInstIdx)
+            preserve(function.instructions[instIdx]);
+    }
+
+    for (uint32_t instIdx : xmmInstUsers)
+    {
+        if (instIdx != kInvalidInstIdx)
+            preserve(function.instructions[instIdx]);
+    }
+}
+
+bool IrRegAllocX64::shouldFreeGpr(RegisterX64 reg) const
+{
+    if (reg == noreg)
+        return false;
+
+    LUAU_ASSERT(reg.size != SizeX64::xmmword);
+
+    for (RegisterX64 gpr : kGprAllocOrder)
+    {
+        if (reg.index == gpr.index)
+            return true;
+    }
+
+    return false;
+}
+
+unsigned IrRegAllocX64::findSpillStackSlot(IrValueKind valueKind)
+{
+    // Find a free stack slot. Two consecutive slots might be required for 16 byte TValues, so '- 1' is used
+    for (unsigned i = 0; i < unsigned(usedSpillSlots.size() - 1); ++i)
+    {
+        if (usedSpillSlots.test(i))
+            continue;
+
+        if (valueKind == IrValueKind::Tvalue && usedSpillSlots.test(i + 1))
+        {
+            ++i; // No need to retest this double position
+            continue;
+        }
+
+        return i;
+    }
+
+    LUAU_ASSERT(!"nowhere to spill");
+    return ~0u;
+}
+
+IrOp IrRegAllocX64::getRestoreOp(const IrInst& inst) const
+{
+    if (IrOp location = function.findRestoreOp(inst); location.kind == IrOpKind::VmReg || location.kind == IrOpKind::VmConst)
+        return location;
+
+    return IrOp();
+}
+
+bool IrRegAllocX64::hasRestoreOp(const IrInst& inst) const
+{
+    return getRestoreOp(inst).kind != IrOpKind::None;
+}
+
+OperandX64 IrRegAllocX64::getRestoreAddress(const IrInst& inst, IrOp restoreOp)
+{
+    switch (getCmdValueKind(inst.cmd))
+    {
+    case IrValueKind::Unknown:
+    case IrValueKind::None:
+        LUAU_ASSERT(!"Invalid operand restore value kind");
+        break;
+    case IrValueKind::Tag:
+        return restoreOp.kind == IrOpKind::VmReg ? luauRegTag(vmRegOp(restoreOp)) : luauConstantTag(vmConstOp(restoreOp));
+    case IrValueKind::Int:
+        LUAU_ASSERT(restoreOp.kind == IrOpKind::VmReg);
+        return luauRegValueInt(vmRegOp(restoreOp));
+    case IrValueKind::Pointer:
+        return restoreOp.kind == IrOpKind::VmReg ? luauRegValue(vmRegOp(restoreOp)) : luauConstantValue(vmConstOp(restoreOp));
+    case IrValueKind::Double:
+        return restoreOp.kind == IrOpKind::VmReg ? luauRegValue(vmRegOp(restoreOp)) : luauConstantValue(vmConstOp(restoreOp));
+    case IrValueKind::Tvalue:
+        return restoreOp.kind == IrOpKind::VmReg ? luauReg(vmRegOp(restoreOp)) : luauConstant(vmConstOp(restoreOp));
+    }
+
+    LUAU_ASSERT(!"Failed to find restore operand location");
+    return noreg;
+}
+
+uint32_t IrRegAllocX64::findInstructionWithFurthestNextUse(const std::array<uint32_t, 16>& regInstUsers) const
+{
+    uint32_t furthestUseTarget = kInvalidInstIdx;
+    uint32_t furthestUseLocation = 0;
+
+    for (uint32_t regInstUser : regInstUsers)
+    {
+        // Cannot spill temporary registers or the register of the value that's defined in the current instruction
+        if (regInstUser == kInvalidInstIdx || regInstUser == currInstIdx)
+            continue;
+
+        uint32_t nextUse = getNextInstUse(function, regInstUser, currInstIdx);
+
+        // Cannot spill value that is about to be used in the current instruction
+        if (nextUse == currInstIdx)
+            continue;
+
+        if (furthestUseTarget == kInvalidInstIdx || nextUse > furthestUseLocation)
+        {
+            furthestUseLocation = nextUse;
+            furthestUseTarget = regInstUser;
+        }
+    }
+
+    return furthestUseTarget;
+}
+
+void IrRegAllocX64::assertFree(RegisterX64 reg) const
+{
+    if (reg.size == SizeX64::xmmword)
+        LUAU_ASSERT(freeXmmMap[reg.index]);
+    else
+        LUAU_ASSERT(freeGprMap[reg.index]);
+}
+
+void IrRegAllocX64::assertAllFree() const
+{
+    for (RegisterX64 reg : kGprAllocOrder)
+        LUAU_ASSERT(freeGprMap[reg.index]);
+
+    for (bool free : freeXmmMap)
+        LUAU_ASSERT(free);
+}
+
+void IrRegAllocX64::assertNoSpills() const
+{
+    LUAU_ASSERT(spills.empty());
+}
+
+ScopedRegX64::ScopedRegX64(IrRegAllocX64& owner)
+    : owner(owner)
+    , reg(noreg)
+{
+}
+
+ScopedRegX64::ScopedRegX64(IrRegAllocX64& owner, SizeX64 size)
+    : owner(owner)
+    , reg(noreg)
+{
+    alloc(size);
+}
+
+ScopedRegX64::ScopedRegX64(IrRegAllocX64& owner, RegisterX64 reg)
+    : owner(owner)
+    , reg(reg)
+{
+}
+
+ScopedRegX64::~ScopedRegX64()
+{
+    if (reg != noreg)
+        owner.freeReg(reg);
+}
+
+void ScopedRegX64::alloc(SizeX64 size)
+{
+    LUAU_ASSERT(reg == noreg);
+    reg = owner.allocReg(size, kInvalidInstIdx);
+}
+
+void ScopedRegX64::free()
+{
+    LUAU_ASSERT(reg != noreg);
+    owner.freeReg(reg);
+    reg = noreg;
+}
+
+RegisterX64 ScopedRegX64::release()
+{
+    RegisterX64 tmp = reg;
+    reg = noreg;
+    return tmp;
+}
+
+ScopedSpills::ScopedSpills(IrRegAllocX64& owner)
+    : owner(owner)
+{
+    startSpillId = owner.nextSpillId;
+}
+
+ScopedSpills::~ScopedSpills()
+{
+    unsigned endSpillId = owner.nextSpillId;
+
+    for (size_t i = 0; i < owner.spills.size();)
+    {
+        IrSpillX64& spill = owner.spills[i];
+
+        // Restoring spills inside this scope cannot create new spills
+        LUAU_ASSERT(spill.spillId < endSpillId);
+
+        // If spill was created inside current scope, it has to be restored
+        if (spill.spillId >= startSpillId)
+        {
+            IrInst& inst = owner.function.instructions[spill.instIdx];
+
+            owner.restore(inst, /*intoOriginalLocation*/ true);
+
+            // Spill restore removes the spill entry, so loop is repeated at the same 'i'
+        }
+        else
+        {
+            i++;
+        }
+    }
+}
+
+} // namespace X64
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/IrTranslateBuiltins.cpp
+++ b/luau/CodeGen/src/IrTranslateBuiltins.cpp
@ -0,0 +1,827 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "IrTranslateBuiltins.h"
+
+#include "Luau/Bytecode.h"
+#include "Luau/IrBuilder.h"
+
+#include "lstate.h"
+
+#include <math.h>
+
+// TODO: when nresults is less than our actual result count, we can skip computing/writing unused results
+
+static const int kMinMaxUnrolledParams = 5;
+static const int kBit32BinaryOpUnrolledParams = 5;
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+static void builtinCheckDouble(IrBuilder& build, IrOp arg, IrOp fallback)
+{
+    if (arg.kind == IrOpKind::Constant)
+        LUAU_ASSERT(build.function.constOp(arg).kind == IrConstKind::Double);
+    else
+        build.loadAndCheckTag(arg, LUA_TNUMBER, fallback);
+}
+
+static IrOp builtinLoadDouble(IrBuilder& build, IrOp arg)
+{
+    if (arg.kind == IrOpKind::Constant)
+        return arg;
+
+    return build.inst(IrCmd::LOAD_DOUBLE, arg);
+}
+
+// Wrapper code for all builtins with a fixed signature and manual assembly lowering of the body
+
+// (number, ...) -> number
+static BuiltinImplResult translateBuiltinNumberToNumber(
+    IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 1 || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+    build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(1), build.constInt(1));
+
+    if (ra != arg)
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+static BuiltinImplResult translateBuiltinNumberToNumberLibm(
+    IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 1 || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+    IrOp va = builtinLoadDouble(build, build.vmReg(arg));
+
+    IrOp res = build.inst(IrCmd::INVOKE_LIBM, build.constUint(bfid), va);
+
+    build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res);
+
+    if (ra != arg)
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+static BuiltinImplResult translateBuiltin2NumberToNumberLibm(
+    IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 2 || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+    builtinCheckDouble(build, args, fallback);
+
+    IrOp va = builtinLoadDouble(build, build.vmReg(arg));
+    IrOp vb = builtinLoadDouble(build, args);
+
+    IrOp res = build.inst(IrCmd::INVOKE_LIBM, build.constUint(bfid), va, vb);
+
+    build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res);
+
+    if (ra != arg)
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+static BuiltinImplResult translateBuiltinMathLdexp(
+    IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 2 || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+    builtinCheckDouble(build, args, fallback);
+
+    IrOp va = builtinLoadDouble(build, build.vmReg(arg));
+    IrOp vb = builtinLoadDouble(build, args);
+
+    IrOp vbi = build.inst(IrCmd::NUM_TO_INT, vb);
+
+    IrOp res = build.inst(IrCmd::INVOKE_LIBM, build.constUint(bfid), va, vbi);
+
+    build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res);
+
+    if (ra != arg)
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+// (number, ...) -> (number, number)
+static BuiltinImplResult translateBuiltinNumberTo2Number(
+    IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 1 || nresults > 2)
+        return {BuiltinImplType::None, -1};
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+    build.inst(
+        IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(1), build.constInt(nresults == 1 ? 1 : 2));
+
+    if (ra != arg)
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
+
+    if (nresults != 1)
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra + 1), build.constTag(LUA_TNUMBER));
+
+    return {BuiltinImplType::UsesFallback, 2};
+}
+
+static BuiltinImplResult translateBuiltinAssert(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 1 || nresults != 0)
+        return {BuiltinImplType::None, -1};
+
+    IrOp cont = build.block(IrBlockKind::Internal);
+
+    // TODO: maybe adding a guard like CHECK_TRUTHY can be useful
+    build.inst(IrCmd::JUMP_IF_FALSY, build.vmReg(arg), fallback, cont);
+    build.beginBlock(cont);
+
+    return {BuiltinImplType::UsesFallback, 0};
+}
+
+static BuiltinImplResult translateBuiltinMathDeg(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 1 || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+
+    const double rpd = (3.14159265358979323846 / 180.0);
+
+    IrOp varg = builtinLoadDouble(build, build.vmReg(arg));
+    IrOp value = build.inst(IrCmd::DIV_NUM, varg, build.constDouble(rpd));
+    build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
+
+    if (ra != arg)
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+static BuiltinImplResult translateBuiltinMathRad(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 1 || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+
+    const double rpd = (3.14159265358979323846 / 180.0);
+
+    IrOp varg = builtinLoadDouble(build, build.vmReg(arg));
+    IrOp value = build.inst(IrCmd::MUL_NUM, varg, build.constDouble(rpd));
+    build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
+
+    if (ra != arg)
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+static BuiltinImplResult translateBuiltinMathLog(
+    IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 1 || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    int libmId = bfid;
+    std::optional<double> denom;
+
+    if (nparams != 1)
+    {
+        std::optional<double> y = build.function.asDoubleOp(args);
+
+        if (!y)
+            return {BuiltinImplType::None, -1};
+
+        if (*y == 2.0)
+            libmId = LBF_IR_MATH_LOG2;
+        else if (*y == 10.0)
+            libmId = LBF_MATH_LOG10;
+        else
+            denom = log(*y);
+    }
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+
+    IrOp va = builtinLoadDouble(build, build.vmReg(arg));
+
+    IrOp res = build.inst(IrCmd::INVOKE_LIBM, build.constUint(libmId), va);
+
+    if (denom)
+        res = build.inst(IrCmd::DIV_NUM, res, build.constDouble(*denom));
+
+    build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res);
+
+    if (ra != arg)
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+static BuiltinImplResult translateBuiltinMathMin(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 2 || nparams > kMinMaxUnrolledParams || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+    builtinCheckDouble(build, args, fallback);
+
+    for (int i = 3; i <= nparams; ++i)
+        builtinCheckDouble(build, build.vmReg(vmRegOp(args) + (i - 2)), fallback);
+
+    IrOp varg1 = builtinLoadDouble(build, build.vmReg(arg));
+    IrOp varg2 = builtinLoadDouble(build, args);
+
+    IrOp res = build.inst(IrCmd::MIN_NUM, varg2, varg1); // Swapped arguments are required for consistency with VM builtins
+
+    for (int i = 3; i <= nparams; ++i)
+    {
+        IrOp arg = builtinLoadDouble(build, build.vmReg(vmRegOp(args) + (i - 2)));
+        res = build.inst(IrCmd::MIN_NUM, arg, res);
+    }
+
+    build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res);
+
+    if (ra != arg)
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+static BuiltinImplResult translateBuiltinMathMax(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 2 || nparams > kMinMaxUnrolledParams || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+    builtinCheckDouble(build, args, fallback);
+
+    for (int i = 3; i <= nparams; ++i)
+        builtinCheckDouble(build, build.vmReg(vmRegOp(args) + (i - 2)), fallback);
+
+    IrOp varg1 = builtinLoadDouble(build, build.vmReg(arg));
+    IrOp varg2 = builtinLoadDouble(build, args);
+
+    IrOp res = build.inst(IrCmd::MAX_NUM, varg2, varg1); // Swapped arguments are required for consistency with VM builtins
+
+    for (int i = 3; i <= nparams; ++i)
+    {
+        IrOp arg = builtinLoadDouble(build, build.vmReg(vmRegOp(args) + (i - 2)));
+        res = build.inst(IrCmd::MAX_NUM, arg, res);
+    }
+
+    build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res);
+
+    if (ra != arg)
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+static BuiltinImplResult translateBuiltinMathClamp(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 3 || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    IrOp block = build.block(IrBlockKind::Internal);
+
+    LUAU_ASSERT(args.kind == IrOpKind::VmReg);
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+    builtinCheckDouble(build, args, fallback);
+    builtinCheckDouble(build, build.vmReg(vmRegOp(args) + 1), fallback);
+
+    IrOp min = builtinLoadDouble(build, args);
+    IrOp max = builtinLoadDouble(build, build.vmReg(vmRegOp(args) + 1));
+
+    build.inst(IrCmd::JUMP_CMP_NUM, min, max, build.cond(IrCondition::NotLessEqual), fallback, block);
+    build.beginBlock(block);
+
+    IrOp v = builtinLoadDouble(build, build.vmReg(arg));
+    IrOp r = build.inst(IrCmd::MAX_NUM, min, v);
+    IrOp clamped = build.inst(IrCmd::MIN_NUM, max, r);
+
+    build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), clamped);
+
+    if (ra != arg)
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+static BuiltinImplResult translateBuiltinMathUnary(IrBuilder& build, IrCmd cmd, int nparams, int ra, int arg, int nresults, IrOp fallback)
+{
+    if (nparams < 1 || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+
+    IrOp varg = builtinLoadDouble(build, build.vmReg(arg));
+    IrOp result = build.inst(cmd, varg);
+
+    build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), result);
+
+    if (ra != arg)
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+static BuiltinImplResult translateBuiltinType(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 1 || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    build.inst(IrCmd::FASTCALL, build.constUint(LBF_TYPE), build.vmReg(ra), build.vmReg(arg), args, build.constInt(1), build.constInt(1));
+
+    build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TSTRING));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+static BuiltinImplResult translateBuiltinTypeof(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 1 || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    build.inst(IrCmd::FASTCALL, build.constUint(LBF_TYPEOF), build.vmReg(ra), build.vmReg(arg), args, build.constInt(1), build.constInt(1));
+
+    build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TSTRING));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+static BuiltinImplResult translateBuiltinBit32BinaryOp(
+    IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 2 || nparams > kBit32BinaryOpUnrolledParams || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+    builtinCheckDouble(build, args, fallback);
+
+    for (int i = 3; i <= nparams; ++i)
+        builtinCheckDouble(build, build.vmReg(vmRegOp(args) + (i - 2)), fallback);
+
+    IrOp va = builtinLoadDouble(build, build.vmReg(arg));
+    IrOp vb = builtinLoadDouble(build, args);
+
+    IrOp vaui = build.inst(IrCmd::NUM_TO_UINT, va);
+    IrOp vbui = build.inst(IrCmd::NUM_TO_UINT, vb);
+
+
+    IrCmd cmd = IrCmd::NOP;
+    if (bfid == LBF_BIT32_BAND || bfid == LBF_BIT32_BTEST)
+        cmd = IrCmd::BITAND_UINT;
+    else if (bfid == LBF_BIT32_BXOR)
+        cmd = IrCmd::BITXOR_UINT;
+    else if (bfid == LBF_BIT32_BOR)
+        cmd = IrCmd::BITOR_UINT;
+
+    LUAU_ASSERT(cmd != IrCmd::NOP);
+
+    IrOp res = build.inst(cmd, vaui, vbui);
+
+    for (int i = 3; i <= nparams; ++i)
+    {
+        IrOp vc = builtinLoadDouble(build, build.vmReg(vmRegOp(args) + (i - 2)));
+        IrOp arg = build.inst(IrCmd::NUM_TO_UINT, vc);
+
+        res = build.inst(cmd, res, arg);
+    }
+
+    if (bfid == LBF_BIT32_BTEST)
+    {
+        IrOp falsey = build.block(IrBlockKind::Internal);
+        IrOp truthy = build.block(IrBlockKind::Internal);
+        IrOp exit = build.block(IrBlockKind::Internal);
+        build.inst(IrCmd::JUMP_EQ_INT, res, build.constInt(0), falsey, truthy);
+
+        build.beginBlock(falsey);
+        build.inst(IrCmd::STORE_INT, build.vmReg(ra), build.constInt(0));
+        build.inst(IrCmd::JUMP, exit);
+
+        build.beginBlock(truthy);
+        build.inst(IrCmd::STORE_INT, build.vmReg(ra), build.constInt(1));
+        build.inst(IrCmd::JUMP, exit);
+
+
+        build.beginBlock(exit);
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TBOOLEAN));
+    }
+    else
+    {
+        IrOp value = build.inst(IrCmd::UINT_TO_NUM, res);
+        build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
+
+        if (ra != arg)
+            build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
+    }
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+static BuiltinImplResult translateBuiltinBit32Bnot(
+    IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 1 || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+    IrOp va = builtinLoadDouble(build, build.vmReg(arg));
+
+    IrOp vaui = build.inst(IrCmd::NUM_TO_UINT, va);
+    IrOp not_ = build.inst(IrCmd::BITNOT_UINT, vaui);
+    IrOp value = build.inst(IrCmd::UINT_TO_NUM, not_);
+
+    build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
+
+    if (ra != arg)
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+static BuiltinImplResult translateBuiltinBit32Shift(
+    IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 2 || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    IrOp block = build.block(IrBlockKind::Internal);
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+    builtinCheckDouble(build, args, fallback);
+
+    IrOp va = builtinLoadDouble(build, build.vmReg(arg));
+    IrOp vb = builtinLoadDouble(build, args);
+
+    IrOp vaui = build.inst(IrCmd::NUM_TO_UINT, va);
+    IrOp vbi = build.inst(IrCmd::NUM_TO_INT, vb);
+
+    build.inst(IrCmd::JUMP_GE_UINT, vbi, build.constInt(32), fallback, block);
+    build.beginBlock(block);
+
+    IrCmd cmd = IrCmd::NOP;
+    if (bfid == LBF_BIT32_LSHIFT)
+        cmd = IrCmd::BITLSHIFT_UINT;
+    else if (bfid == LBF_BIT32_RSHIFT)
+        cmd = IrCmd::BITRSHIFT_UINT;
+    else if (bfid == LBF_BIT32_ARSHIFT)
+        cmd = IrCmd::BITARSHIFT_UINT;
+
+    LUAU_ASSERT(cmd != IrCmd::NOP);
+
+    IrOp shift = build.inst(cmd, vaui, vbi);
+
+    IrOp value = build.inst(IrCmd::UINT_TO_NUM, shift);
+    build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
+
+    if (ra != arg)
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+static BuiltinImplResult translateBuiltinBit32Rotate(
+    IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 2 || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+    builtinCheckDouble(build, args, fallback);
+
+    IrOp va = builtinLoadDouble(build, build.vmReg(arg));
+    IrOp vb = builtinLoadDouble(build, args);
+
+    IrOp vaui = build.inst(IrCmd::NUM_TO_UINT, va);
+    IrOp vbi = build.inst(IrCmd::NUM_TO_INT, vb);
+
+    IrCmd cmd = (bfid == LBF_BIT32_LROTATE) ? IrCmd::BITLROTATE_UINT : IrCmd::BITRROTATE_UINT;
+    IrOp shift = build.inst(cmd, vaui, vbi);
+
+    IrOp value = build.inst(IrCmd::UINT_TO_NUM, shift);
+    build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
+
+    if (ra != arg)
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+static BuiltinImplResult translateBuiltinBit32Extract(
+    IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 2 || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+    builtinCheckDouble(build, args, fallback);
+
+    IrOp va = builtinLoadDouble(build, build.vmReg(arg));
+    IrOp vb = builtinLoadDouble(build, args);
+
+    IrOp n = build.inst(IrCmd::NUM_TO_UINT, va);
+    IrOp f = build.inst(IrCmd::NUM_TO_INT, vb);
+
+    IrOp value;
+    if (nparams == 2)
+    {
+        IrOp block = build.block(IrBlockKind::Internal);
+        build.inst(IrCmd::JUMP_GE_UINT, f, build.constInt(32), fallback, block);
+        build.beginBlock(block);
+
+        // TODO: this can be optimized using a bit-select instruction (bt on x86)
+        IrOp shift = build.inst(IrCmd::BITRSHIFT_UINT, n, f);
+        value = build.inst(IrCmd::BITAND_UINT, shift, build.constInt(1));
+    }
+    else
+    {
+        builtinCheckDouble(build, build.vmReg(args.index + 1), fallback);
+        IrOp vc = builtinLoadDouble(build, build.vmReg(args.index + 1));
+        IrOp w = build.inst(IrCmd::NUM_TO_INT, vc);
+
+        IrOp block1 = build.block(IrBlockKind::Internal);
+        build.inst(IrCmd::JUMP_LT_INT, f, build.constInt(0), fallback, block1);
+        build.beginBlock(block1);
+
+        IrOp block2 = build.block(IrBlockKind::Internal);
+        build.inst(IrCmd::JUMP_LT_INT, w, build.constInt(1), fallback, block2);
+        build.beginBlock(block2);
+
+        IrOp block3 = build.block(IrBlockKind::Internal);
+        IrOp fw = build.inst(IrCmd::ADD_INT, f, w);
+        build.inst(IrCmd::JUMP_LT_INT, fw, build.constInt(33), block3, fallback);
+        build.beginBlock(block3);
+
+        IrOp shift = build.inst(IrCmd::BITLSHIFT_UINT, build.constInt(0xfffffffe), build.inst(IrCmd::SUB_INT, w, build.constInt(1)));
+        IrOp m = build.inst(IrCmd::BITNOT_UINT, shift);
+
+        IrOp nf = build.inst(IrCmd::BITRSHIFT_UINT, n, f);
+        value = build.inst(IrCmd::BITAND_UINT, nf, m);
+    }
+
+    build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), build.inst(IrCmd::UINT_TO_NUM, value));
+
+    if (ra != arg)
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+static BuiltinImplResult translateBuiltinBit32ExtractK(
+    IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 2 || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+
+    IrOp va = builtinLoadDouble(build, build.vmReg(arg));
+    IrOp n = build.inst(IrCmd::NUM_TO_UINT, va);
+
+    double a2 = build.function.doubleOp(args);
+    int fw = int(a2);
+
+    int f = fw & 31;
+    int w1 = fw >> 5;
+
+    uint32_t m = ~(0xfffffffeu << w1);
+
+    IrOp nf = build.inst(IrCmd::BITRSHIFT_UINT, n, build.constInt(f));
+    IrOp and_ = build.inst(IrCmd::BITAND_UINT, nf, build.constInt(m));
+
+    IrOp value = build.inst(IrCmd::UINT_TO_NUM, and_);
+    build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
+
+    if (ra != arg)
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+static BuiltinImplResult translateBuiltinBit32Countz(
+    IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 1 || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+    IrOp va = builtinLoadDouble(build, build.vmReg(arg));
+
+    IrOp vaui = build.inst(IrCmd::NUM_TO_UINT, va);
+
+    IrCmd cmd = (bfid == LBF_BIT32_COUNTLZ) ? IrCmd::BITCOUNTLZ_UINT : IrCmd::BITCOUNTRZ_UINT;
+    IrOp bin = build.inst(cmd, vaui);
+
+    IrOp value = build.inst(IrCmd::UINT_TO_NUM, bin);
+
+    build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
+
+    if (ra != arg)
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+static BuiltinImplResult translateBuiltinBit32Replace(
+    IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 3 || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+    builtinCheckDouble(build, args, fallback);
+    builtinCheckDouble(build, build.vmReg(args.index + 1), fallback);
+
+    IrOp va = builtinLoadDouble(build, build.vmReg(arg));
+    IrOp vb = builtinLoadDouble(build, args);
+    IrOp vc = builtinLoadDouble(build, build.vmReg(args.index + 1));
+
+    IrOp n = build.inst(IrCmd::NUM_TO_UINT, va);
+    IrOp v = build.inst(IrCmd::NUM_TO_UINT, vb);
+    IrOp f = build.inst(IrCmd::NUM_TO_INT, vc);
+
+    IrOp value;
+    if (nparams == 3)
+    {
+        IrOp block = build.block(IrBlockKind::Internal);
+        build.inst(IrCmd::JUMP_GE_UINT, f, build.constInt(32), fallback, block);
+        build.beginBlock(block);
+
+        // TODO: this can be optimized using a bit-select instruction (btr on x86)
+        IrOp m = build.constInt(1);
+        IrOp shift = build.inst(IrCmd::BITLSHIFT_UINT, m, f);
+        IrOp not_ = build.inst(IrCmd::BITNOT_UINT, shift);
+        IrOp lhs = build.inst(IrCmd::BITAND_UINT, n, not_);
+
+        IrOp vm = build.inst(IrCmd::BITAND_UINT, v, m);
+        IrOp rhs = build.inst(IrCmd::BITLSHIFT_UINT, vm, f);
+
+        value = build.inst(IrCmd::BITOR_UINT, lhs, rhs);
+    }
+    else
+    {
+        builtinCheckDouble(build, build.vmReg(args.index + 2), fallback);
+        IrOp vd = builtinLoadDouble(build, build.vmReg(args.index + 2));
+        IrOp w = build.inst(IrCmd::NUM_TO_INT, vd);
+
+        IrOp block1 = build.block(IrBlockKind::Internal);
+        build.inst(IrCmd::JUMP_LT_INT, f, build.constInt(0), fallback, block1);
+        build.beginBlock(block1);
+
+        IrOp block2 = build.block(IrBlockKind::Internal);
+        build.inst(IrCmd::JUMP_LT_INT, w, build.constInt(1), fallback, block2);
+        build.beginBlock(block2);
+
+        IrOp block3 = build.block(IrBlockKind::Internal);
+        IrOp fw = build.inst(IrCmd::ADD_INT, f, w);
+        build.inst(IrCmd::JUMP_LT_INT, fw, build.constInt(33), block3, fallback);
+        build.beginBlock(block3);
+
+        IrOp shift1 = build.inst(IrCmd::BITLSHIFT_UINT, build.constInt(0xfffffffe), build.inst(IrCmd::SUB_INT, w, build.constInt(1)));
+        IrOp m = build.inst(IrCmd::BITNOT_UINT, shift1);
+
+        IrOp shift2 = build.inst(IrCmd::BITLSHIFT_UINT, m, f);
+        IrOp not_ = build.inst(IrCmd::BITNOT_UINT, shift2);
+        IrOp lhs = build.inst(IrCmd::BITAND_UINT, n, not_);
+
+        IrOp vm = build.inst(IrCmd::BITAND_UINT, v, m);
+        IrOp rhs = build.inst(IrCmd::BITLSHIFT_UINT, vm, f);
+
+        value = build.inst(IrCmd::BITOR_UINT, lhs, rhs);
+    }
+
+    build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), build.inst(IrCmd::UINT_TO_NUM, value));
+
+    if (ra != arg)
+        build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+static BuiltinImplResult translateBuiltinVector(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
+{
+    if (nparams < 3 || nresults > 1)
+        return {BuiltinImplType::None, -1};
+
+    LUAU_ASSERT(LUA_VECTOR_SIZE == 3);
+
+    builtinCheckDouble(build, build.vmReg(arg), fallback);
+    builtinCheckDouble(build, args, fallback);
+    builtinCheckDouble(build, build.vmReg(vmRegOp(args) + 1), fallback);
+
+    IrOp x = builtinLoadDouble(build, build.vmReg(arg));
+    IrOp y = builtinLoadDouble(build, args);
+    IrOp z = builtinLoadDouble(build, build.vmReg(vmRegOp(args) + 1));
+
+    build.inst(IrCmd::STORE_VECTOR, build.vmReg(ra), x, y, z);
+    build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TVECTOR));
+
+    return {BuiltinImplType::UsesFallback, 1};
+}
+
+BuiltinImplResult translateBuiltin(IrBuilder& build, int bfid, int ra, int arg, IrOp args, int nparams, int nresults, IrOp fallback)
+{
+    // Builtins are not allowed to handle variadic arguments
+    if (nparams == LUA_MULTRET)
+        return {BuiltinImplType::None, -1};
+
+    switch (bfid)
+    {
+    case LBF_ASSERT:
+        return translateBuiltinAssert(build, nparams, ra, arg, args, nresults, fallback);
+    case LBF_MATH_DEG:
+        return translateBuiltinMathDeg(build, nparams, ra, arg, args, nresults, fallback);
+    case LBF_MATH_RAD:
+        return translateBuiltinMathRad(build, nparams, ra, arg, args, nresults, fallback);
+    case LBF_MATH_LOG:
+        return translateBuiltinMathLog(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
+    case LBF_MATH_MIN:
+        return translateBuiltinMathMin(build, nparams, ra, arg, args, nresults, fallback);
+    case LBF_MATH_MAX:
+        return translateBuiltinMathMax(build, nparams, ra, arg, args, nresults, fallback);
+    case LBF_MATH_CLAMP:
+        return translateBuiltinMathClamp(build, nparams, ra, arg, args, nresults, fallback);
+    case LBF_MATH_FLOOR:
+        return translateBuiltinMathUnary(build, IrCmd::FLOOR_NUM, nparams, ra, arg, nresults, fallback);
+    case LBF_MATH_CEIL:
+        return translateBuiltinMathUnary(build, IrCmd::CEIL_NUM, nparams, ra, arg, nresults, fallback);
+    case LBF_MATH_SQRT:
+        return translateBuiltinMathUnary(build, IrCmd::SQRT_NUM, nparams, ra, arg, nresults, fallback);
+    case LBF_MATH_ABS:
+        return translateBuiltinMathUnary(build, IrCmd::ABS_NUM, nparams, ra, arg, nresults, fallback);
+    case LBF_MATH_ROUND:
+        return translateBuiltinMathUnary(build, IrCmd::ROUND_NUM, nparams, ra, arg, nresults, fallback);
+    case LBF_MATH_EXP:
+    case LBF_MATH_ASIN:
+    case LBF_MATH_SIN:
+    case LBF_MATH_SINH:
+    case LBF_MATH_ACOS:
+    case LBF_MATH_COS:
+    case LBF_MATH_COSH:
+    case LBF_MATH_ATAN:
+    case LBF_MATH_TAN:
+    case LBF_MATH_TANH:
+    case LBF_MATH_LOG10:
+        return translateBuiltinNumberToNumberLibm(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
+    case LBF_MATH_SIGN:
+        return translateBuiltinNumberToNumber(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
+    case LBF_MATH_POW:
+    case LBF_MATH_FMOD:
+    case LBF_MATH_ATAN2:
+        return translateBuiltin2NumberToNumberLibm(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
+    case LBF_MATH_LDEXP:
+        return translateBuiltinMathLdexp(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
+    case LBF_MATH_FREXP:
+    case LBF_MATH_MODF:
+        return translateBuiltinNumberTo2Number(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
+    case LBF_BIT32_BAND:
+    case LBF_BIT32_BOR:
+    case LBF_BIT32_BXOR:
+    case LBF_BIT32_BTEST:
+        return translateBuiltinBit32BinaryOp(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
+    case LBF_BIT32_BNOT:
+        return translateBuiltinBit32Bnot(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
+    case LBF_BIT32_LSHIFT:
+    case LBF_BIT32_RSHIFT:
+    case LBF_BIT32_ARSHIFT:
+        return translateBuiltinBit32Shift(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
+    case LBF_BIT32_LROTATE:
+    case LBF_BIT32_RROTATE:
+        return translateBuiltinBit32Rotate(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
+    case LBF_BIT32_EXTRACT:
+        return translateBuiltinBit32Extract(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
+    case LBF_BIT32_EXTRACTK:
+        return translateBuiltinBit32ExtractK(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
+    case LBF_BIT32_COUNTLZ:
+    case LBF_BIT32_COUNTRZ:
+        return translateBuiltinBit32Countz(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
+    case LBF_BIT32_REPLACE:
+        return translateBuiltinBit32Replace(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
+    case LBF_TYPE:
+        return translateBuiltinType(build, nparams, ra, arg, args, nresults, fallback);
+    case LBF_TYPEOF:
+        return translateBuiltinTypeof(build, nparams, ra, arg, args, nresults, fallback);
+    case LBF_VECTOR:
+        return translateBuiltinVector(build, nparams, ra, arg, args, nresults, fallback);
+    default:
+        return {BuiltinImplType::None, -1};
+    }
+}
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/IrTranslateBuiltins.h
+++ b/luau/CodeGen/src/IrTranslateBuiltins.h
@ -0,0 +1,27 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+struct IrBuilder;
+struct IrOp;
+
+enum class BuiltinImplType
+{
+    None,
+    UsesFallback, // Uses fallback for unsupported cases
+};
+
+struct BuiltinImplResult
+{
+    BuiltinImplType type;
+    int actualResultCount;
+};
+
+BuiltinImplResult translateBuiltin(IrBuilder& build, int bfid, int ra, int arg, IrOp args, int nparams, int nresults, IrOp fallback);
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/IrTranslation.cpp
+++ b/luau/CodeGen/src/IrTranslation.cpp
--- a/luau/CodeGen/src/IrTranslation.h
+++ b/luau/CodeGen/src/IrTranslation.h
@ -0,0 +1,68 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include <stdint.h>
+
+#include "ltm.h"
+
+typedef uint32_t Instruction;
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+enum class IrCondition : uint8_t;
+struct IrOp;
+struct IrBuilder;
+enum class IrCmd : uint8_t;
+
+void translateInstLoadNil(IrBuilder& build, const Instruction* pc);
+void translateInstLoadB(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstLoadN(IrBuilder& build, const Instruction* pc);
+void translateInstLoadK(IrBuilder& build, const Instruction* pc);
+void translateInstLoadKX(IrBuilder& build, const Instruction* pc);
+void translateInstMove(IrBuilder& build, const Instruction* pc);
+void translateInstJump(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstJumpBack(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstJumpIf(IrBuilder& build, const Instruction* pc, int pcpos, bool not_);
+void translateInstJumpIfEq(IrBuilder& build, const Instruction* pc, int pcpos, bool not_);
+void translateInstJumpIfCond(IrBuilder& build, const Instruction* pc, int pcpos, IrCondition cond);
+void translateInstJumpX(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstJumpxEqNil(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstJumpxEqB(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstJumpxEqN(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstJumpxEqS(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstBinary(IrBuilder& build, const Instruction* pc, int pcpos, TMS tm);
+void translateInstBinaryK(IrBuilder& build, const Instruction* pc, int pcpos, TMS tm);
+void translateInstNot(IrBuilder& build, const Instruction* pc);
+void translateInstMinus(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstLength(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstNewTable(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstDupTable(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstGetUpval(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstSetUpval(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstCloseUpvals(IrBuilder& build, const Instruction* pc);
+void translateFastCallN(IrBuilder& build, const Instruction* pc, int pcpos, bool customParams, int customParamCount, IrOp customArgs, IrOp next);
+void translateInstForNPrep(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstForNLoop(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstForGPrepNext(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstForGPrepInext(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstForGLoopIpairs(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstGetTableN(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstSetTableN(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstGetTable(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstSetTable(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstGetImport(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstGetTableKS(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstSetTableKS(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstGetGlobal(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstSetGlobal(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstConcat(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstCapture(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstNamecall(IrBuilder& build, const Instruction* pc, int pcpos);
+void translateInstAndX(IrBuilder& build, const Instruction* pc, int pcpos, IrOp c);
+void translateInstOrX(IrBuilder& build, const Instruction* pc, int pcpos, IrOp c);
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/IrUtils.cpp
+++ b/luau/CodeGen/src/IrUtils.cpp
@ -0,0 +1,791 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "Luau/IrUtils.h"
+
+#include "Luau/IrBuilder.h"
+
+#include "BitUtils.h"
+#include "NativeState.h"
+
+#include "lua.h"
+#include "lnumutils.h"
+
+#include <limits.h>
+#include <math.h>
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+IrValueKind getCmdValueKind(IrCmd cmd)
+{
+    switch (cmd)
+    {
+    case IrCmd::NOP:
+        return IrValueKind::None;
+    case IrCmd::LOAD_TAG:
+        return IrValueKind::Tag;
+    case IrCmd::LOAD_POINTER:
+        return IrValueKind::Pointer;
+    case IrCmd::LOAD_DOUBLE:
+        return IrValueKind::Double;
+    case IrCmd::LOAD_INT:
+        return IrValueKind::Int;
+    case IrCmd::LOAD_TVALUE:
+    case IrCmd::LOAD_NODE_VALUE_TV:
+        return IrValueKind::Tvalue;
+    case IrCmd::LOAD_ENV:
+    case IrCmd::GET_ARR_ADDR:
+    case IrCmd::GET_SLOT_NODE_ADDR:
+    case IrCmd::GET_HASH_NODE_ADDR:
+        return IrValueKind::Pointer;
+    case IrCmd::STORE_TAG:
+    case IrCmd::STORE_POINTER:
+    case IrCmd::STORE_DOUBLE:
+    case IrCmd::STORE_INT:
+    case IrCmd::STORE_VECTOR:
+    case IrCmd::STORE_TVALUE:
+    case IrCmd::STORE_NODE_VALUE_TV:
+        return IrValueKind::None;
+    case IrCmd::ADD_INT:
+    case IrCmd::SUB_INT:
+        return IrValueKind::Int;
+    case IrCmd::ADD_NUM:
+    case IrCmd::SUB_NUM:
+    case IrCmd::MUL_NUM:
+    case IrCmd::DIV_NUM:
+    case IrCmd::MOD_NUM:
+    case IrCmd::MIN_NUM:
+    case IrCmd::MAX_NUM:
+    case IrCmd::UNM_NUM:
+    case IrCmd::FLOOR_NUM:
+    case IrCmd::CEIL_NUM:
+    case IrCmd::ROUND_NUM:
+    case IrCmd::SQRT_NUM:
+    case IrCmd::ABS_NUM:
+        return IrValueKind::Double;
+    case IrCmd::NOT_ANY:
+        return IrValueKind::Int;
+    case IrCmd::JUMP:
+    case IrCmd::JUMP_IF_TRUTHY:
+    case IrCmd::JUMP_IF_FALSY:
+    case IrCmd::JUMP_EQ_TAG:
+    case IrCmd::JUMP_EQ_INT:
+    case IrCmd::JUMP_LT_INT:
+    case IrCmd::JUMP_GE_UINT:
+    case IrCmd::JUMP_EQ_POINTER:
+    case IrCmd::JUMP_CMP_NUM:
+    case IrCmd::JUMP_CMP_ANY:
+    case IrCmd::JUMP_SLOT_MATCH:
+        return IrValueKind::None;
+    case IrCmd::TABLE_LEN:
+        return IrValueKind::Double;
+    case IrCmd::NEW_TABLE:
+    case IrCmd::DUP_TABLE:
+        return IrValueKind::Pointer;
+    case IrCmd::TRY_NUM_TO_INDEX:
+        return IrValueKind::Int;
+    case IrCmd::TRY_CALL_FASTGETTM:
+        return IrValueKind::Pointer;
+    case IrCmd::INT_TO_NUM:
+    case IrCmd::UINT_TO_NUM:
+        return IrValueKind::Double;
+    case IrCmd::NUM_TO_INT:
+    case IrCmd::NUM_TO_UINT:
+        return IrValueKind::Int;
+    case IrCmd::ADJUST_STACK_TO_REG:
+    case IrCmd::ADJUST_STACK_TO_TOP:
+        return IrValueKind::None;
+    case IrCmd::FASTCALL:
+        return IrValueKind::None;
+    case IrCmd::INVOKE_FASTCALL:
+        return IrValueKind::Int;
+    case IrCmd::CHECK_FASTCALL_RES:
+    case IrCmd::DO_ARITH:
+    case IrCmd::DO_LEN:
+    case IrCmd::GET_TABLE:
+    case IrCmd::SET_TABLE:
+    case IrCmd::GET_IMPORT:
+    case IrCmd::CONCAT:
+    case IrCmd::GET_UPVALUE:
+    case IrCmd::SET_UPVALUE:
+    case IrCmd::PREPARE_FORN:
+    case IrCmd::CHECK_TAG:
+    case IrCmd::CHECK_READONLY:
+    case IrCmd::CHECK_NO_METATABLE:
+    case IrCmd::CHECK_SAFE_ENV:
+    case IrCmd::CHECK_ARRAY_SIZE:
+    case IrCmd::CHECK_SLOT_MATCH:
+    case IrCmd::CHECK_NODE_NO_NEXT:
+    case IrCmd::INTERRUPT:
+    case IrCmd::CHECK_GC:
+    case IrCmd::BARRIER_OBJ:
+    case IrCmd::BARRIER_TABLE_BACK:
+    case IrCmd::BARRIER_TABLE_FORWARD:
+    case IrCmd::SET_SAVEDPC:
+    case IrCmd::CLOSE_UPVALS:
+    case IrCmd::CAPTURE:
+    case IrCmd::SETLIST:
+    case IrCmd::CALL:
+    case IrCmd::RETURN:
+    case IrCmd::FORGLOOP:
+    case IrCmd::FORGLOOP_FALLBACK:
+    case IrCmd::FORGPREP_XNEXT_FALLBACK:
+    case IrCmd::COVERAGE:
+    case IrCmd::FALLBACK_GETGLOBAL:
+    case IrCmd::FALLBACK_SETGLOBAL:
+    case IrCmd::FALLBACK_GETTABLEKS:
+    case IrCmd::FALLBACK_SETTABLEKS:
+    case IrCmd::FALLBACK_NAMECALL:
+    case IrCmd::FALLBACK_PREPVARARGS:
+    case IrCmd::FALLBACK_GETVARARGS:
+    case IrCmd::FALLBACK_NEWCLOSURE:
+    case IrCmd::FALLBACK_DUPCLOSURE:
+    case IrCmd::FALLBACK_FORGPREP:
+        return IrValueKind::None;
+    case IrCmd::SUBSTITUTE:
+        return IrValueKind::Unknown;
+    case IrCmd::BITAND_UINT:
+    case IrCmd::BITXOR_UINT:
+    case IrCmd::BITOR_UINT:
+    case IrCmd::BITNOT_UINT:
+    case IrCmd::BITLSHIFT_UINT:
+    case IrCmd::BITRSHIFT_UINT:
+    case IrCmd::BITARSHIFT_UINT:
+    case IrCmd::BITLROTATE_UINT:
+    case IrCmd::BITRROTATE_UINT:
+    case IrCmd::BITCOUNTLZ_UINT:
+    case IrCmd::BITCOUNTRZ_UINT:
+        return IrValueKind::Int;
+    case IrCmd::INVOKE_LIBM:
+        return IrValueKind::Double;
+    }
+
+    LUAU_UNREACHABLE();
+}
+
+static void removeInstUse(IrFunction& function, uint32_t instIdx)
+{
+    IrInst& inst = function.instructions[instIdx];
+
+    LUAU_ASSERT(inst.useCount);
+    inst.useCount--;
+
+    if (inst.useCount == 0)
+        kill(function, inst);
+}
+
+static void removeBlockUse(IrFunction& function, uint32_t blockIdx)
+{
+    IrBlock& block = function.blocks[blockIdx];
+
+    LUAU_ASSERT(block.useCount);
+    block.useCount--;
+
+    // Entry block is never removed because is has an implicit use
+    if (block.useCount == 0 && blockIdx != 0)
+        kill(function, block);
+}
+
+void addUse(IrFunction& function, IrOp op)
+{
+    if (op.kind == IrOpKind::Inst)
+        function.instructions[op.index].useCount++;
+    else if (op.kind == IrOpKind::Block)
+        function.blocks[op.index].useCount++;
+}
+
+void removeUse(IrFunction& function, IrOp op)
+{
+    if (op.kind == IrOpKind::Inst)
+        removeInstUse(function, op.index);
+    else if (op.kind == IrOpKind::Block)
+        removeBlockUse(function, op.index);
+}
+
+bool isGCO(uint8_t tag)
+{
+    // mirrors iscollectable(o) from VM/lobject.h
+    return tag >= LUA_TSTRING;
+}
+
+void kill(IrFunction& function, IrInst& inst)
+{
+    LUAU_ASSERT(inst.useCount == 0);
+
+    inst.cmd = IrCmd::NOP;
+
+    removeUse(function, inst.a);
+    removeUse(function, inst.b);
+    removeUse(function, inst.c);
+    removeUse(function, inst.d);
+    removeUse(function, inst.e);
+    removeUse(function, inst.f);
+
+    inst.a = {};
+    inst.b = {};
+    inst.c = {};
+    inst.d = {};
+    inst.e = {};
+    inst.f = {};
+}
+
+void kill(IrFunction& function, uint32_t start, uint32_t end)
+{
+    // Kill instructions in reverse order to avoid killing instructions that are still marked as used
+    for (int i = int(end); i >= int(start); i--)
+    {
+        LUAU_ASSERT(unsigned(i) < function.instructions.size());
+        IrInst& curr = function.instructions[i];
+
+        if (curr.cmd == IrCmd::NOP)
+            continue;
+
+        kill(function, curr);
+    }
+}
+
+void kill(IrFunction& function, IrBlock& block)
+{
+    LUAU_ASSERT(block.useCount == 0);
+
+    block.kind = IrBlockKind::Dead;
+
+    kill(function, block.start, block.finish);
+    block.start = ~0u;
+    block.finish = ~0u;
+}
+
+void replace(IrFunction& function, IrOp& original, IrOp replacement)
+{
+    // Add use before removing new one if that's the last one keeping target operand alive
+    addUse(function, replacement);
+    removeUse(function, original);
+
+    original = replacement;
+}
+
+void replace(IrFunction& function, IrBlock& block, uint32_t instIdx, IrInst replacement)
+{
+    IrInst& inst = function.instructions[instIdx];
+
+    // Add uses before removing new ones if those are the last ones keeping target operand alive
+    addUse(function, replacement.a);
+    addUse(function, replacement.b);
+    addUse(function, replacement.c);
+    addUse(function, replacement.d);
+    addUse(function, replacement.e);
+    addUse(function, replacement.f);
+
+    // An extra reference is added so block will not remove itself
+    block.useCount++;
+
+    // If we introduced an earlier terminating instruction, all following instructions become dead
+    if (!isBlockTerminator(inst.cmd) && isBlockTerminator(replacement.cmd))
+    {
+        // Block has has to be fully constructed before replacement is performed
+        LUAU_ASSERT(block.finish != ~0u);
+        LUAU_ASSERT(instIdx + 1 <= block.finish);
+
+        kill(function, instIdx + 1, block.finish);
+
+        block.finish = instIdx;
+    }
+
+    removeUse(function, inst.a);
+    removeUse(function, inst.b);
+    removeUse(function, inst.c);
+    removeUse(function, inst.d);
+    removeUse(function, inst.e);
+    removeUse(function, inst.f);
+
+    // Inherit existing use count (last use is skipped as it will be defined later)
+    replacement.useCount = inst.useCount;
+
+    inst = replacement;
+
+    // Removing the earlier extra reference, this might leave the block without users without marking it as dead
+    // This will have to be handled by separate dead code elimination
+    block.useCount--;
+}
+
+void substitute(IrFunction& function, IrInst& inst, IrOp replacement)
+{
+    LUAU_ASSERT(!isBlockTerminator(inst.cmd));
+
+    inst.cmd = IrCmd::SUBSTITUTE;
+
+    addUse(function, replacement);
+
+    removeUse(function, inst.a);
+    removeUse(function, inst.b);
+    removeUse(function, inst.c);
+    removeUse(function, inst.d);
+    removeUse(function, inst.e);
+    removeUse(function, inst.f);
+
+    inst.a = replacement;
+    inst.b = {};
+    inst.c = {};
+    inst.d = {};
+    inst.e = {};
+    inst.f = {};
+}
+
+void applySubstitutions(IrFunction& function, IrOp& op)
+{
+    if (op.kind == IrOpKind::Inst)
+    {
+        IrInst& src = function.instructions[op.index];
+
+        if (src.cmd == IrCmd::SUBSTITUTE)
+        {
+            op.kind = src.a.kind;
+            op.index = src.a.index;
+
+            // If we substitute with the result of a different instruction, update the use count
+            if (op.kind == IrOpKind::Inst)
+            {
+                IrInst& dst = function.instructions[op.index];
+                LUAU_ASSERT(dst.cmd != IrCmd::SUBSTITUTE && "chained substitutions are not allowed");
+
+                dst.useCount++;
+            }
+
+            LUAU_ASSERT(src.useCount > 0);
+            src.useCount--;
+
+            if (src.useCount == 0)
+                removeUse(function, src.a);
+        }
+    }
+}
+
+void applySubstitutions(IrFunction& function, IrInst& inst)
+{
+    applySubstitutions(function, inst.a);
+    applySubstitutions(function, inst.b);
+    applySubstitutions(function, inst.c);
+    applySubstitutions(function, inst.d);
+    applySubstitutions(function, inst.e);
+    applySubstitutions(function, inst.f);
+}
+
+bool compare(double a, double b, IrCondition cond)
+{
+    switch (cond)
+    {
+    case IrCondition::Equal:
+        return a == b;
+    case IrCondition::NotEqual:
+        return a != b;
+    case IrCondition::Less:
+        return a < b;
+    case IrCondition::NotLess:
+        return !(a < b);
+    case IrCondition::LessEqual:
+        return a <= b;
+    case IrCondition::NotLessEqual:
+        return !(a <= b);
+    case IrCondition::Greater:
+        return a > b;
+    case IrCondition::NotGreater:
+        return !(a > b);
+    case IrCondition::GreaterEqual:
+        return a >= b;
+    case IrCondition::NotGreaterEqual:
+        return !(a >= b);
+    default:
+        LUAU_ASSERT(!"unsupported conidtion");
+    }
+
+    return false;
+}
+
+void foldConstants(IrBuilder& build, IrFunction& function, IrBlock& block, uint32_t index)
+{
+    IrInst& inst = function.instructions[index];
+
+    switch (inst.cmd)
+    {
+    case IrCmd::ADD_INT:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+        {
+            // We need to avoid signed integer overflow, but we also have to produce a result
+            // So we add numbers as unsigned and use fixed-width integer types to force a two's complement evaluation
+            int32_t lhs = function.intOp(inst.a);
+            int32_t rhs = function.intOp(inst.b);
+            int sum = int32_t(uint32_t(lhs) + uint32_t(rhs));
+
+            substitute(function, inst, build.constInt(sum));
+        }
+        break;
+    case IrCmd::SUB_INT:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+        {
+            // We need to avoid signed integer overflow, but we also have to produce a result
+            // So we subtract numbers as unsigned and use fixed-width integer types to force a two's complement evaluation
+            int32_t lhs = function.intOp(inst.a);
+            int32_t rhs = function.intOp(inst.b);
+            int sum = int32_t(uint32_t(lhs) - uint32_t(rhs));
+
+            substitute(function, inst, build.constInt(sum));
+        }
+        break;
+    case IrCmd::ADD_NUM:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+            substitute(function, inst, build.constDouble(function.doubleOp(inst.a) + function.doubleOp(inst.b)));
+        break;
+    case IrCmd::SUB_NUM:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+            substitute(function, inst, build.constDouble(function.doubleOp(inst.a) - function.doubleOp(inst.b)));
+        break;
+    case IrCmd::MUL_NUM:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+            substitute(function, inst, build.constDouble(function.doubleOp(inst.a) * function.doubleOp(inst.b)));
+        break;
+    case IrCmd::DIV_NUM:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+            substitute(function, inst, build.constDouble(function.doubleOp(inst.a) / function.doubleOp(inst.b)));
+        break;
+    case IrCmd::MOD_NUM:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+            substitute(function, inst, build.constDouble(luai_nummod(function.doubleOp(inst.a), function.doubleOp(inst.b))));
+        break;
+    case IrCmd::MIN_NUM:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+        {
+            double a1 = function.doubleOp(inst.a);
+            double a2 = function.doubleOp(inst.b);
+
+            substitute(function, inst, build.constDouble(a1 < a2 ? a1 : a2));
+        }
+        break;
+    case IrCmd::MAX_NUM:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+        {
+            double a1 = function.doubleOp(inst.a);
+            double a2 = function.doubleOp(inst.b);
+
+            substitute(function, inst, build.constDouble(a1 > a2 ? a1 : a2));
+        }
+        break;
+    case IrCmd::UNM_NUM:
+        if (inst.a.kind == IrOpKind::Constant)
+            substitute(function, inst, build.constDouble(-function.doubleOp(inst.a)));
+        break;
+    case IrCmd::FLOOR_NUM:
+        if (inst.a.kind == IrOpKind::Constant)
+            substitute(function, inst, build.constDouble(floor(function.doubleOp(inst.a))));
+        break;
+    case IrCmd::CEIL_NUM:
+        if (inst.a.kind == IrOpKind::Constant)
+            substitute(function, inst, build.constDouble(ceil(function.doubleOp(inst.a))));
+        break;
+    case IrCmd::ROUND_NUM:
+        if (inst.a.kind == IrOpKind::Constant)
+            substitute(function, inst, build.constDouble(round(function.doubleOp(inst.a))));
+        break;
+    case IrCmd::SQRT_NUM:
+        if (inst.a.kind == IrOpKind::Constant)
+            substitute(function, inst, build.constDouble(sqrt(function.doubleOp(inst.a))));
+        break;
+    case IrCmd::ABS_NUM:
+        if (inst.a.kind == IrOpKind::Constant)
+            substitute(function, inst, build.constDouble(fabs(function.doubleOp(inst.a))));
+        break;
+    case IrCmd::NOT_ANY:
+        if (inst.a.kind == IrOpKind::Constant)
+        {
+            uint8_t a = function.tagOp(inst.a);
+
+            if (a == LUA_TNIL)
+                substitute(function, inst, build.constInt(1));
+            else if (a != LUA_TBOOLEAN)
+                substitute(function, inst, build.constInt(0));
+            else if (inst.b.kind == IrOpKind::Constant)
+                substitute(function, inst, build.constInt(function.intOp(inst.b) == 1 ? 0 : 1));
+        }
+        break;
+    case IrCmd::JUMP_EQ_TAG:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+        {
+            if (function.tagOp(inst.a) == function.tagOp(inst.b))
+                replace(function, block, index, {IrCmd::JUMP, inst.c});
+            else
+                replace(function, block, index, {IrCmd::JUMP, inst.d});
+        }
+        break;
+    case IrCmd::JUMP_EQ_INT:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+        {
+            if (function.intOp(inst.a) == function.intOp(inst.b))
+                replace(function, block, index, {IrCmd::JUMP, inst.c});
+            else
+                replace(function, block, index, {IrCmd::JUMP, inst.d});
+        }
+        break;
+    case IrCmd::JUMP_LT_INT:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+        {
+            if (function.intOp(inst.a) < function.intOp(inst.b))
+                replace(function, block, index, {IrCmd::JUMP, inst.c});
+            else
+                replace(function, block, index, {IrCmd::JUMP, inst.d});
+        }
+        break;
+    case IrCmd::JUMP_GE_UINT:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+        {
+            if (unsigned(function.intOp(inst.a)) >= unsigned(function.intOp(inst.b)))
+                replace(function, block, index, {IrCmd::JUMP, inst.c});
+            else
+                replace(function, block, index, {IrCmd::JUMP, inst.d});
+        }
+        break;
+    case IrCmd::JUMP_CMP_NUM:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+        {
+            if (compare(function.doubleOp(inst.a), function.doubleOp(inst.b), conditionOp(inst.c)))
+                replace(function, block, index, {IrCmd::JUMP, inst.d});
+            else
+                replace(function, block, index, {IrCmd::JUMP, inst.e});
+        }
+        break;
+    case IrCmd::TRY_NUM_TO_INDEX:
+        if (inst.a.kind == IrOpKind::Constant)
+        {
+            double value = function.doubleOp(inst.a);
+
+            // To avoid undefined behavior of casting a value not representable in the target type, we check the range
+            if (value >= INT_MIN && value <= INT_MAX)
+            {
+                int arrIndex = int(value);
+
+                if (double(arrIndex) == value)
+                    substitute(function, inst, build.constInt(arrIndex));
+                else
+                    replace(function, block, index, {IrCmd::JUMP, inst.b});
+            }
+            else
+            {
+                replace(function, block, index, {IrCmd::JUMP, inst.b});
+            }
+        }
+        break;
+    case IrCmd::INT_TO_NUM:
+        if (inst.a.kind == IrOpKind::Constant)
+            substitute(function, inst, build.constDouble(double(function.intOp(inst.a))));
+        break;
+    case IrCmd::UINT_TO_NUM:
+        if (inst.a.kind == IrOpKind::Constant)
+            substitute(function, inst, build.constDouble(double(unsigned(function.intOp(inst.a)))));
+        break;
+    case IrCmd::NUM_TO_INT:
+        if (inst.a.kind == IrOpKind::Constant)
+        {
+            double value = function.doubleOp(inst.a);
+
+            // To avoid undefined behavior of casting a value not representable in the target type, we check the range
+            if (value >= INT_MIN && value <= INT_MAX)
+                substitute(function, inst, build.constInt(int(value)));
+        }
+        break;
+    case IrCmd::NUM_TO_UINT:
+        if (inst.a.kind == IrOpKind::Constant)
+        {
+            double value = function.doubleOp(inst.a);
+
+            // To avoid undefined behavior of casting a value not representable in the target type, we check the range
+            if (value >= 0 && value <= UINT_MAX)
+                substitute(function, inst, build.constInt(unsigned(function.doubleOp(inst.a))));
+        }
+        break;
+    case IrCmd::CHECK_TAG:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+        {
+            if (function.tagOp(inst.a) == function.tagOp(inst.b))
+                kill(function, inst);
+            else
+                replace(function, block, index, {IrCmd::JUMP, inst.c}); // Shows a conflict in assumptions on this path
+        }
+        break;
+    case IrCmd::BITAND_UINT:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+        {
+            unsigned op1 = unsigned(function.intOp(inst.a));
+            unsigned op2 = unsigned(function.intOp(inst.b));
+            substitute(function, inst, build.constInt(op1 & op2));
+        }
+        else
+        {
+            if (inst.a.kind == IrOpKind::Constant && function.intOp(inst.a) == 0) // (0 & b) -> 0
+                substitute(function, inst, build.constInt(0));
+            else if (inst.a.kind == IrOpKind::Constant && function.intOp(inst.a) == -1) // (-1 & b) -> b
+                substitute(function, inst, inst.b);
+            else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0) // (a & 0) -> 0
+                substitute(function, inst, build.constInt(0));
+            else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == -1) // (a & -1) -> a
+                substitute(function, inst, inst.a);
+        }
+        break;
+    case IrCmd::BITXOR_UINT:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+        {
+            unsigned op1 = unsigned(function.intOp(inst.a));
+            unsigned op2 = unsigned(function.intOp(inst.b));
+            substitute(function, inst, build.constInt(op1 ^ op2));
+        }
+        else
+        {
+            if (inst.a.kind == IrOpKind::Constant && function.intOp(inst.a) == 0) // (0 ^ b) -> b
+                substitute(function, inst, inst.b);
+            else if (inst.a.kind == IrOpKind::Constant && function.intOp(inst.a) == -1) // (-1 ^ b) -> ~b
+                replace(function, block, index, {IrCmd::BITNOT_UINT, inst.b});
+            else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0) // (a ^ 0) -> a
+                substitute(function, inst, inst.a);
+            else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == -1) // (a ^ -1) -> ~a
+                replace(function, block, index, {IrCmd::BITNOT_UINT, inst.a});
+        }
+        break;
+    case IrCmd::BITOR_UINT:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+        {
+            unsigned op1 = unsigned(function.intOp(inst.a));
+            unsigned op2 = unsigned(function.intOp(inst.b));
+            substitute(function, inst, build.constInt(op1 | op2));
+        }
+        else
+        {
+            if (inst.a.kind == IrOpKind::Constant && function.intOp(inst.a) == 0) // (0 | b) -> b
+                substitute(function, inst, inst.b);
+            else if (inst.a.kind == IrOpKind::Constant && function.intOp(inst.a) == -1) // (-1 | b) -> -1
+                substitute(function, inst, build.constInt(-1));
+            else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0) // (a | 0) -> a
+                substitute(function, inst, inst.a);
+            else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == -1) // (a | -1) -> -1
+                substitute(function, inst, build.constInt(-1));
+        }
+        break;
+    case IrCmd::BITNOT_UINT:
+        if (inst.a.kind == IrOpKind::Constant)
+            substitute(function, inst, build.constInt(~unsigned(function.intOp(inst.a))));
+        break;
+    case IrCmd::BITLSHIFT_UINT:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+        {
+            unsigned op1 = unsigned(function.intOp(inst.a));
+            int op2 = function.intOp(inst.b);
+
+            if (unsigned(op2) < 32)
+                substitute(function, inst, build.constInt(op1 << op2));
+        }
+        else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0)
+        {
+            substitute(function, inst, inst.a);
+        }
+        break;
+    case IrCmd::BITRSHIFT_UINT:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+        {
+            unsigned op1 = unsigned(function.intOp(inst.a));
+            int op2 = function.intOp(inst.b);
+
+            if (unsigned(op2) < 32)
+                substitute(function, inst, build.constInt(op1 >> op2));
+        }
+        else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0)
+        {
+            substitute(function, inst, inst.a);
+        }
+        break;
+    case IrCmd::BITARSHIFT_UINT:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+        {
+            int op1 = function.intOp(inst.a);
+            int op2 = function.intOp(inst.b);
+
+            if (unsigned(op2) < 32)
+            {
+                // note: technically right shift of negative values is UB, but this behavior is getting defined in C++20 and all compilers do the
+                // right (shift) thing.
+                substitute(function, inst, build.constInt(op1 >> op2));
+            }
+        }
+        else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0)
+        {
+            substitute(function, inst, inst.a);
+        }
+        break;
+    case IrCmd::BITLROTATE_UINT:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+            substitute(function, inst, build.constInt(lrotate(unsigned(function.intOp(inst.a)), function.intOp(inst.b))));
+        else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0)
+            substitute(function, inst, inst.a);
+        break;
+    case IrCmd::BITRROTATE_UINT:
+        if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
+            substitute(function, inst, build.constInt(rrotate(unsigned(function.intOp(inst.a)), function.intOp(inst.b))));
+        else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0)
+            substitute(function, inst, inst.a);
+        break;
+    case IrCmd::BITCOUNTLZ_UINT:
+        if (inst.a.kind == IrOpKind::Constant)
+            substitute(function, inst, build.constInt(countlz(unsigned(function.intOp(inst.a)))));
+        break;
+    case IrCmd::BITCOUNTRZ_UINT:
+        if (inst.a.kind == IrOpKind::Constant)
+            substitute(function, inst, build.constInt(countrz(unsigned(function.intOp(inst.a)))));
+        break;
+    default:
+        break;
+    }
+}
+
+uint32_t getNativeContextOffset(int bfid)
+{
+    switch (bfid)
+    {
+    case LBF_MATH_ACOS:
+        return offsetof(NativeContext, libm_acos);
+    case LBF_MATH_ASIN:
+        return offsetof(NativeContext, libm_asin);
+    case LBF_MATH_ATAN2:
+        return offsetof(NativeContext, libm_atan2);
+    case LBF_MATH_ATAN:
+        return offsetof(NativeContext, libm_atan);
+    case LBF_MATH_COSH:
+        return offsetof(NativeContext, libm_cosh);
+    case LBF_MATH_COS:
+        return offsetof(NativeContext, libm_cos);
+    case LBF_MATH_EXP:
+        return offsetof(NativeContext, libm_exp);
+    case LBF_MATH_LOG10:
+        return offsetof(NativeContext, libm_log10);
+    case LBF_MATH_LOG:
+        return offsetof(NativeContext, libm_log);
+    case LBF_MATH_SINH:
+        return offsetof(NativeContext, libm_sinh);
+    case LBF_MATH_SIN:
+        return offsetof(NativeContext, libm_sin);
+    case LBF_MATH_TANH:
+        return offsetof(NativeContext, libm_tanh);
+    case LBF_MATH_TAN:
+        return offsetof(NativeContext, libm_tan);
+    case LBF_MATH_FMOD:
+        return offsetof(NativeContext, libm_fmod);
+    case LBF_MATH_POW:
+        return offsetof(NativeContext, libm_pow);
+    case LBF_IR_MATH_LOG2:
+        return offsetof(NativeContext, libm_log2);
+    case LBF_MATH_LDEXP:
+        return offsetof(NativeContext, libm_ldexp);
+    default:
+        LUAU_ASSERT(!"Unsupported bfid");
+    }
+
+    return 0;
+}
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/IrValueLocationTracking.cpp
+++ b/luau/CodeGen/src/IrValueLocationTracking.cpp
@ -0,0 +1,222 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "IrValueLocationTracking.h"
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+IrValueLocationTracking::IrValueLocationTracking(IrFunction& function)
+    : function(function)
+{
+    vmRegValue.fill(kInvalidInstIdx);
+}
+
+void IrValueLocationTracking::setRestoreCallack(void* context, void (*callback)(void* context, IrInst& inst))
+{
+    restoreCallbackCtx = context;
+    restoreCallback = callback;
+}
+
+void IrValueLocationTracking::beforeInstLowering(IrInst& inst)
+{
+    switch (inst.cmd)
+    {
+    case IrCmd::STORE_TAG:
+    case IrCmd::STORE_POINTER:
+    case IrCmd::STORE_DOUBLE:
+    case IrCmd::STORE_INT:
+    case IrCmd::STORE_VECTOR:
+    case IrCmd::STORE_TVALUE:
+        invalidateRestoreOp(inst.a);
+        break;
+    case IrCmd::ADJUST_STACK_TO_REG:
+        invalidateRestoreVmRegs(vmRegOp(inst.a), -1);
+        break;
+    case IrCmd::FASTCALL:
+        invalidateRestoreVmRegs(vmRegOp(inst.b), function.intOp(inst.f));
+        break;
+    case IrCmd::INVOKE_FASTCALL:
+        // Multiple return sequences (count == -1) are defined by ADJUST_STACK_TO_REG
+        if (int count = function.intOp(inst.f); count != -1)
+            invalidateRestoreVmRegs(vmRegOp(inst.b), count);
+        break;
+    case IrCmd::DO_ARITH:
+    case IrCmd::DO_LEN:
+    case IrCmd::GET_TABLE:
+    case IrCmd::GET_IMPORT:
+        invalidateRestoreOp(inst.a);
+        break;
+    case IrCmd::CONCAT:
+        invalidateRestoreVmRegs(vmRegOp(inst.a), function.uintOp(inst.b));
+        break;
+    case IrCmd::GET_UPVALUE:
+        invalidateRestoreOp(inst.a);
+        break;
+    case IrCmd::PREPARE_FORN:
+        invalidateRestoreOp(inst.a);
+        invalidateRestoreOp(inst.b);
+        invalidateRestoreOp(inst.c);
+        break;
+    case IrCmd::CALL:
+        // Even if result count is limited, all registers starting from function (ra) might be modified
+        invalidateRestoreVmRegs(vmRegOp(inst.a), -1);
+        break;
+    case IrCmd::FORGLOOP:
+    case IrCmd::FORGLOOP_FALLBACK:
+        // Even if result count is limited, all registers starting from iteration index (ra+2) might be modified
+        invalidateRestoreVmRegs(vmRegOp(inst.a) + 2, -1);
+        break;
+    case IrCmd::FALLBACK_GETGLOBAL:
+    case IrCmd::FALLBACK_GETTABLEKS:
+        invalidateRestoreOp(inst.b);
+        break;
+    case IrCmd::FALLBACK_NAMECALL:
+        invalidateRestoreVmRegs(vmRegOp(inst.b), 2);
+        break;
+    case IrCmd::FALLBACK_GETVARARGS:
+        invalidateRestoreVmRegs(vmRegOp(inst.b), function.intOp(inst.c));
+        break;
+    case IrCmd::FALLBACK_NEWCLOSURE:
+    case IrCmd::FALLBACK_DUPCLOSURE:
+        invalidateRestoreOp(inst.b);
+        break;
+    case IrCmd::FALLBACK_FORGPREP:
+        invalidateRestoreVmRegs(vmRegOp(inst.b), 3);
+        break;
+
+        // Make sure all VmReg referencing instructions are handled explicitly (only register reads here)
+    case IrCmd::LOAD_TAG:
+    case IrCmd::LOAD_POINTER:
+    case IrCmd::LOAD_DOUBLE:
+    case IrCmd::LOAD_INT:
+    case IrCmd::LOAD_TVALUE:
+    case IrCmd::JUMP_IF_TRUTHY:
+    case IrCmd::JUMP_IF_FALSY:
+    case IrCmd::JUMP_CMP_ANY:
+    case IrCmd::SET_TABLE:
+    case IrCmd::SET_UPVALUE:
+    case IrCmd::INTERRUPT:
+    case IrCmd::BARRIER_OBJ:
+    case IrCmd::BARRIER_TABLE_FORWARD:
+    case IrCmd::CLOSE_UPVALS:
+    case IrCmd::CAPTURE:
+    case IrCmd::SETLIST:
+    case IrCmd::RETURN:
+    case IrCmd::FORGPREP_XNEXT_FALLBACK:
+    case IrCmd::FALLBACK_SETGLOBAL:
+    case IrCmd::FALLBACK_SETTABLEKS:
+    case IrCmd::FALLBACK_PREPVARARGS:
+    case IrCmd::ADJUST_STACK_TO_TOP:
+        break;
+
+        // These instrucitons read VmReg only after optimizeMemoryOperandsX64
+    case IrCmd::CHECK_TAG:
+    case IrCmd::ADD_NUM:
+    case IrCmd::SUB_NUM:
+    case IrCmd::MUL_NUM:
+    case IrCmd::DIV_NUM:
+    case IrCmd::MOD_NUM:
+    case IrCmd::MIN_NUM:
+    case IrCmd::MAX_NUM:
+    case IrCmd::JUMP_EQ_TAG:
+    case IrCmd::JUMP_CMP_NUM:
+        break;
+
+    default:
+        // All instructions which reference registers have to be handled explicitly
+        LUAU_ASSERT(inst.a.kind != IrOpKind::VmReg);
+        LUAU_ASSERT(inst.b.kind != IrOpKind::VmReg);
+        LUAU_ASSERT(inst.c.kind != IrOpKind::VmReg);
+        LUAU_ASSERT(inst.d.kind != IrOpKind::VmReg);
+        LUAU_ASSERT(inst.e.kind != IrOpKind::VmReg);
+        LUAU_ASSERT(inst.f.kind != IrOpKind::VmReg);
+        break;
+    }
+}
+
+void IrValueLocationTracking::afterInstLowering(IrInst& inst, uint32_t instIdx)
+{
+    switch (inst.cmd)
+    {
+    case IrCmd::LOAD_TAG:
+    case IrCmd::LOAD_POINTER:
+    case IrCmd::LOAD_DOUBLE:
+    case IrCmd::LOAD_INT:
+    case IrCmd::LOAD_TVALUE:
+        recordRestoreOp(instIdx, inst.a);
+        break;
+    case IrCmd::STORE_POINTER:
+    case IrCmd::STORE_DOUBLE:
+    case IrCmd::STORE_INT:
+    case IrCmd::STORE_TVALUE:
+        // If this is not the last use of the stored value, we can restore it from this new location
+        if (inst.b.kind == IrOpKind::Inst && function.instOp(inst.b).lastUse != instIdx)
+            recordRestoreOp(inst.b.index, inst.a);
+        break;
+    default:
+        break;
+    }
+}
+
+void IrValueLocationTracking::recordRestoreOp(uint32_t instIdx, IrOp location)
+{
+    if (location.kind == IrOpKind::VmReg)
+    {
+        int reg = vmRegOp(location);
+
+        if (reg > maxReg)
+            maxReg = reg;
+
+        // Record location in register memory only if register is not captured
+        if (!function.cfg.captured.regs.test(reg))
+            function.recordRestoreOp(instIdx, location);
+
+        vmRegValue[reg] = instIdx;
+    }
+    else if (location.kind == IrOpKind::VmConst)
+    {
+        function.recordRestoreOp(instIdx, location);
+    }
+}
+
+void IrValueLocationTracking::invalidateRestoreOp(IrOp location)
+{
+    if (location.kind == IrOpKind::VmReg)
+    {
+        uint32_t& instIdx = vmRegValue[vmRegOp(location)];
+
+        if (instIdx != kInvalidInstIdx)
+        {
+            IrInst& inst = function.instructions[instIdx];
+
+            // If instruction value is spilled and memory location is about to be lost, it has to be restored immediately
+            if (inst.needsReload)
+                restoreCallback(restoreCallbackCtx, inst);
+
+            // Instruction loses its memory storage location
+            function.recordRestoreOp(instIdx, IrOp());
+
+            // Register loses link with instruction
+            instIdx = kInvalidInstIdx;
+        }
+    }
+    else if (location.kind == IrOpKind::VmConst)
+    {
+        LUAU_ASSERT(!"VM constants are immutable");
+    }
+}
+
+void IrValueLocationTracking::invalidateRestoreVmRegs(int start, int count)
+{
+    int end = count == -1 ? 255 : start + count;
+
+    if (end > maxReg)
+        end = maxReg;
+
+    for (int reg = start; reg <= end; reg++)
+        invalidateRestoreOp(IrOp{IrOpKind::VmReg, uint8_t(reg)});
+}
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/IrValueLocationTracking.h
+++ b/luau/CodeGen/src/IrValueLocationTracking.h
@ -0,0 +1,38 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/IrData.h"
+
+#include <array>
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+struct IrValueLocationTracking
+{
+    IrValueLocationTracking(IrFunction& function);
+
+    void setRestoreCallack(void* context, void (*callback)(void* context, IrInst& inst));
+
+    void beforeInstLowering(IrInst& inst);
+    void afterInstLowering(IrInst& inst, uint32_t instIdx);
+
+    void recordRestoreOp(uint32_t instIdx, IrOp location);
+    void invalidateRestoreOp(IrOp location);
+    void invalidateRestoreVmRegs(int start, int count);
+
+    IrFunction& function;
+
+    std::array<uint32_t, 256> vmRegValue;
+
+    // For range/full invalidations, we only want to visit a limited number of data that we have recorded
+    int maxReg = 0;
+
+    void* restoreCallbackCtx = nullptr;
+    void (*restoreCallback)(void* context, IrInst& inst) = nullptr;
+};
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/NativeState.cpp
+++ b/luau/CodeGen/src/NativeState.cpp
@ -0,0 +1,111 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "NativeState.h"
+
+#include "Luau/UnwindBuilder.h"
+
+#include "CodeGenUtils.h"
+#include "CustomExecUtils.h"
+
+#include "lbuiltins.h"
+#include "lgc.h"
+#include "ltable.h"
+#include "lfunc.h"
+#include "lvm.h"
+
+#include <math.h>
+#include <string.h>
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+constexpr unsigned kBlockSize = 4 * 1024 * 1024;
+constexpr unsigned kMaxTotalSize = 256 * 1024 * 1024;
+
+NativeState::NativeState()
+    : codeAllocator(kBlockSize, kMaxTotalSize)
+{
+}
+
+NativeState::~NativeState() = default;
+
+void initFunctions(NativeState& data)
+{
+    static_assert(sizeof(data.context.luauF_table) == sizeof(luauF_table), "fastcall tables are not of the same length");
+    memcpy(data.context.luauF_table, luauF_table, sizeof(luauF_table));
+
+    data.context.luaV_lessthan = luaV_lessthan;
+    data.context.luaV_lessequal = luaV_lessequal;
+    data.context.luaV_equalval = luaV_equalval;
+    data.context.luaV_doarith = luaV_doarith;
+    data.context.luaV_dolen = luaV_dolen;
+    data.context.luaV_prepareFORN = luaV_prepareFORN;
+    data.context.luaV_gettable = luaV_gettable;
+    data.context.luaV_settable = luaV_settable;
+    data.context.luaV_getimport = luaV_getimport;
+    data.context.luaV_concat = luaV_concat;
+
+    data.context.luaH_getn = luaH_getn;
+    data.context.luaH_new = luaH_new;
+    data.context.luaH_clone = luaH_clone;
+    data.context.luaH_resizearray = luaH_resizearray;
+
+    data.context.luaC_barriertable = luaC_barriertable;
+    data.context.luaC_barrierf = luaC_barrierf;
+    data.context.luaC_barrierback = luaC_barrierback;
+    data.context.luaC_step = luaC_step;
+
+    data.context.luaF_close = luaF_close;
+
+    data.context.luaT_gettm = luaT_gettm;
+    data.context.luaT_objtypenamestr = luaT_objtypenamestr;
+
+    data.context.libm_exp = exp;
+    data.context.libm_pow = pow;
+    data.context.libm_fmod = fmod;
+    data.context.libm_log = log;
+    data.context.libm_log2 = log2;
+    data.context.libm_log10 = log10;
+    data.context.libm_ldexp = ldexp;
+    data.context.libm_round = round;
+    data.context.libm_frexp = frexp;
+    data.context.libm_modf = modf;
+
+    data.context.libm_asin = asin;
+    data.context.libm_sin = sin;
+    data.context.libm_sinh = sinh;
+    data.context.libm_acos = acos;
+    data.context.libm_cos = cos;
+    data.context.libm_cosh = cosh;
+    data.context.libm_atan = atan;
+    data.context.libm_atan2 = atan2;
+    data.context.libm_tan = tan;
+    data.context.libm_tanh = tanh;
+
+    data.context.forgLoopTableIter = forgLoopTableIter;
+    data.context.forgLoopNodeIter = forgLoopNodeIter;
+    data.context.forgLoopNonTableFallback = forgLoopNonTableFallback;
+    data.context.forgPrepXnextFallback = forgPrepXnextFallback;
+    data.context.callProlog = callProlog;
+    data.context.callEpilogC = callEpilogC;
+
+    data.context.callFallback = callFallback;
+    data.context.returnFallback = returnFallback;
+
+    data.context.executeGETGLOBAL = executeGETGLOBAL;
+    data.context.executeSETGLOBAL = executeSETGLOBAL;
+    data.context.executeGETTABLEKS = executeGETTABLEKS;
+    data.context.executeSETTABLEKS = executeSETTABLEKS;
+
+    data.context.executeNEWCLOSURE = executeNEWCLOSURE;
+    data.context.executeNAMECALL = executeNAMECALL;
+    data.context.executeFORGPREP = executeFORGPREP;
+    data.context.executeGETVARARGS = executeGETVARARGS;
+    data.context.executeDUPCLOSURE = executeDUPCLOSURE;
+    data.context.executePREPVARARGS = executePREPVARARGS;
+    data.context.executeSETLIST = executeSETLIST;
+}
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/NativeState.h
+++ b/luau/CodeGen/src/NativeState.h
@ -0,0 +1,127 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#pragma once
+
+#include "Luau/Bytecode.h"
+#include "Luau/CodeAllocator.h"
+#include "Luau/Label.h"
+
+#include <memory>
+
+#include <stdint.h>
+
+#include "ldebug.h"
+#include "lobject.h"
+#include "ltm.h"
+#include "lstate.h"
+
+typedef int (*luau_FastFunction)(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams);
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+class UnwindBuilder;
+
+struct NativeContext
+{
+    // Gateway (C => native transition) entry & exit, compiled at runtime
+    uint8_t* gateEntry = nullptr;
+    uint8_t* gateExit = nullptr;
+
+    // Helper functions, implemented in C
+    int (*luaV_lessthan)(lua_State* L, const TValue* l, const TValue* r) = nullptr;
+    int (*luaV_lessequal)(lua_State* L, const TValue* l, const TValue* r) = nullptr;
+    int (*luaV_equalval)(lua_State* L, const TValue* t1, const TValue* t2) = nullptr;
+    void (*luaV_doarith)(lua_State* L, StkId ra, const TValue* rb, const TValue* rc, TMS op) = nullptr;
+    void (*luaV_dolen)(lua_State* L, StkId ra, const TValue* rb) = nullptr;
+    void (*luaV_prepareFORN)(lua_State* L, StkId plimit, StkId pstep, StkId pinit) = nullptr;
+    void (*luaV_gettable)(lua_State* L, const TValue* t, TValue* key, StkId val) = nullptr;
+    void (*luaV_settable)(lua_State* L, const TValue* t, TValue* key, StkId val) = nullptr;
+    void (*luaV_getimport)(lua_State* L, Table* env, TValue* k, uint32_t id, bool propagatenil) = nullptr;
+    void (*luaV_concat)(lua_State* L, int total, int last) = nullptr;
+
+    int (*luaH_getn)(Table* t) = nullptr;
+    Table* (*luaH_new)(lua_State* L, int narray, int lnhash) = nullptr;
+    Table* (*luaH_clone)(lua_State* L, Table* tt) = nullptr;
+    void (*luaH_resizearray)(lua_State* L, Table* t, int nasize) = nullptr;
+
+    void (*luaC_barriertable)(lua_State* L, Table* t, GCObject* v) = nullptr;
+    void (*luaC_barrierf)(lua_State* L, GCObject* o, GCObject* v) = nullptr;
+    void (*luaC_barrierback)(lua_State* L, GCObject* o, GCObject** gclist) = nullptr;
+    size_t (*luaC_step)(lua_State* L, bool assist) = nullptr;
+
+    void (*luaF_close)(lua_State* L, StkId level) = nullptr;
+
+    const TValue* (*luaT_gettm)(Table* events, TMS event, TString* ename) = nullptr;
+    const TString* (*luaT_objtypenamestr)(lua_State* L, const TValue* o) = nullptr;
+
+    double (*libm_exp)(double) = nullptr;
+    double (*libm_pow)(double, double) = nullptr;
+    double (*libm_fmod)(double, double) = nullptr;
+    double (*libm_asin)(double) = nullptr;
+    double (*libm_sin)(double) = nullptr;
+    double (*libm_sinh)(double) = nullptr;
+    double (*libm_acos)(double) = nullptr;
+    double (*libm_cos)(double) = nullptr;
+    double (*libm_cosh)(double) = nullptr;
+    double (*libm_atan)(double) = nullptr;
+    double (*libm_atan2)(double, double) = nullptr;
+    double (*libm_tan)(double) = nullptr;
+    double (*libm_tanh)(double) = nullptr;
+    double (*libm_log)(double) = nullptr;
+    double (*libm_log2)(double) = nullptr;
+    double (*libm_log10)(double) = nullptr;
+    double (*libm_ldexp)(double, int) = nullptr;
+    double (*libm_round)(double) = nullptr;
+    double (*libm_frexp)(double, int*) = nullptr;
+    double (*libm_modf)(double, double*) = nullptr;
+
+    // Helper functions
+    bool (*forgLoopTableIter)(lua_State* L, Table* h, int index, TValue* ra) = nullptr;
+    bool (*forgLoopNodeIter)(lua_State* L, Table* h, int index, TValue* ra) = nullptr;
+    bool (*forgLoopNonTableFallback)(lua_State* L, int insnA, int aux) = nullptr;
+    void (*forgPrepXnextFallback)(lua_State* L, TValue* ra, int pc) = nullptr;
+    Closure* (*callProlog)(lua_State* L, TValue* ra, StkId argtop, int nresults) = nullptr;
+    void (*callEpilogC)(lua_State* L, int nresults, int n) = nullptr;
+
+    Closure* (*callFallback)(lua_State* L, StkId ra, StkId argtop, int nresults) = nullptr;
+    Closure* (*returnFallback)(lua_State* L, StkId ra, StkId valend) = nullptr;
+
+    // Opcode fallbacks, implemented in C
+    const Instruction* (*executeGETGLOBAL)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
+    const Instruction* (*executeSETGLOBAL)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
+    const Instruction* (*executeGETTABLEKS)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
+    const Instruction* (*executeSETTABLEKS)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
+    const Instruction* (*executeNEWCLOSURE)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
+    const Instruction* (*executeNAMECALL)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
+    const Instruction* (*executeSETLIST)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
+    const Instruction* (*executeFORGPREP)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
+    const Instruction* (*executeGETVARARGS)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
+    const Instruction* (*executeDUPCLOSURE)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
+    const Instruction* (*executePREPVARARGS)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
+
+    // Fast call methods, implemented in C
+    luau_FastFunction luauF_table[256] = {};
+};
+
+using GateFn = int (*)(lua_State*, Proto*, uintptr_t, NativeContext*);
+
+struct NativeState
+{
+    NativeState();
+    ~NativeState();
+
+    CodeAllocator codeAllocator;
+    std::unique_ptr<UnwindBuilder> unwindBuilder;
+
+    uint8_t* gateData = nullptr;
+    size_t gateDataSize = 0;
+
+    NativeContext context;
+};
+
+void initFunctions(NativeState& data);
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/OptimizeConstProp.cpp
+++ b/luau/CodeGen/src/OptimizeConstProp.cpp
--- a/luau/CodeGen/src/OptimizeFinalX64.cpp
+++ b/luau/CodeGen/src/OptimizeFinalX64.cpp
@ -0,0 +1,109 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "Luau/OptimizeFinalX64.h"
+
+#include "Luau/IrUtils.h"
+
+#include <utility>
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+// x64 assembly allows memory operands, but IR separates loads from uses
+// To improve final x64 lowering, we try to 'inline' single-use register/constant loads into some of our instructions
+// This pass might not be useful on different architectures
+static void optimizeMemoryOperandsX64(IrFunction& function, IrBlock& block)
+{
+    LUAU_ASSERT(block.kind != IrBlockKind::Dead);
+
+    for (uint32_t index = block.start; index <= block.finish; index++)
+    {
+        LUAU_ASSERT(index < function.instructions.size());
+        IrInst& inst = function.instructions[index];
+
+        switch (inst.cmd)
+        {
+        case IrCmd::CHECK_TAG:
+        {
+            if (inst.a.kind == IrOpKind::Inst)
+            {
+                IrInst& tag = function.instOp(inst.a);
+
+                if (tag.useCount == 1 && tag.cmd == IrCmd::LOAD_TAG && (tag.a.kind == IrOpKind::VmReg || tag.a.kind == IrOpKind::VmConst))
+                    replace(function, inst.a, tag.a);
+            }
+            break;
+        }
+        case IrCmd::ADD_NUM:
+        case IrCmd::SUB_NUM:
+        case IrCmd::MUL_NUM:
+        case IrCmd::DIV_NUM:
+        case IrCmd::MOD_NUM:
+        case IrCmd::MIN_NUM:
+        case IrCmd::MAX_NUM:
+        {
+            if (inst.b.kind == IrOpKind::Inst)
+            {
+                IrInst& rhs = function.instOp(inst.b);
+
+                if (rhs.useCount == 1 && rhs.cmd == IrCmd::LOAD_DOUBLE && (rhs.a.kind == IrOpKind::VmReg || rhs.a.kind == IrOpKind::VmConst))
+                    replace(function, inst.b, rhs.a);
+            }
+            break;
+        }
+        case IrCmd::JUMP_EQ_TAG:
+        {
+            if (inst.a.kind == IrOpKind::Inst)
+            {
+                IrInst& tagA = function.instOp(inst.a);
+
+                if (tagA.useCount == 1 && tagA.cmd == IrCmd::LOAD_TAG && (tagA.a.kind == IrOpKind::VmReg || tagA.a.kind == IrOpKind::VmConst))
+                {
+                    replace(function, inst.a, tagA.a);
+                    break;
+                }
+            }
+
+            if (inst.b.kind == IrOpKind::Inst)
+            {
+                IrInst& tagB = function.instOp(inst.b);
+
+                if (tagB.useCount == 1 && tagB.cmd == IrCmd::LOAD_TAG && (tagB.a.kind == IrOpKind::VmReg || tagB.a.kind == IrOpKind::VmConst))
+                {
+                    std::swap(inst.a, inst.b);
+                    replace(function, inst.a, tagB.a);
+                }
+            }
+            break;
+        }
+        case IrCmd::JUMP_CMP_NUM:
+        {
+            if (inst.a.kind == IrOpKind::Inst)
+            {
+                IrInst& num = function.instOp(inst.a);
+
+                if (num.useCount == 1 && num.cmd == IrCmd::LOAD_DOUBLE)
+                    replace(function, inst.a, num.a);
+            }
+            break;
+        }
+        default:
+            break;
+        }
+    }
+}
+
+void optimizeMemoryOperandsX64(IrFunction& function)
+{
+    for (IrBlock& block : function.blocks)
+    {
+        if (block.kind == IrBlockKind::Dead)
+            continue;
+
+        optimizeMemoryOperandsX64(function, block);
+    }
+}
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/UnwindBuilderDwarf2.cpp
+++ b/luau/CodeGen/src/UnwindBuilderDwarf2.cpp
@ -0,0 +1,299 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "Luau/UnwindBuilderDwarf2.h"
+
+#include "ByteUtils.h"
+
+#include <string.h>
+
+// General information about Dwarf2 format can be found at:
+// https://dwarfstd.org/doc/dwarf-2.0.0.pdf [DWARF Debugging Information Format]
+// Main part for async exception unwinding is in section '6.4 Call Frame Information'
+
+// Information about System V ABI (AMD64) can be found at:
+// https://refspecs.linuxbase.org/elf/x86_64-abi-0.99.pdf [System V Application Binary Interface (AMD64 Architecture Processor Supplement)]
+// Interaction between Dwarf2 and System V ABI can be found in sections '3.6.2 DWARF Register Number Mapping' and '4.2.4 EH_FRAME sections'
+
+// Call frame instruction opcodes (Dwarf2, page 78, ch. 7.23 figure 37)
+#define DW_CFA_advance_loc 0x40
+#define DW_CFA_offset 0x80
+#define DW_CFA_restore 0xc0
+#define DW_CFA_set_loc 0x01
+#define DW_CFA_advance_loc1 0x02
+#define DW_CFA_advance_loc2 0x03
+#define DW_CFA_advance_loc4 0x04
+#define DW_CFA_offset_extended 0x05
+#define DW_CFA_restore_extended 0x06
+#define DW_CFA_undefined 0x07
+#define DW_CFA_same_value 0x08
+#define DW_CFA_register 0x09
+#define DW_CFA_remember_state 0x0a
+#define DW_CFA_restore_state 0x0b
+#define DW_CFA_def_cfa 0x0c
+#define DW_CFA_def_cfa_register 0x0d
+#define DW_CFA_def_cfa_offset 0x0e
+#define DW_CFA_def_cfa_expression 0x0f
+#define DW_CFA_nop 0x00
+#define DW_CFA_lo_user 0x1c
+#define DW_CFA_hi_user 0x3f
+
+// Register numbers for X64 (System V ABI, page 57, ch. 3.7, figure 3.36)
+#define DW_REG_X64_RAX 0
+#define DW_REG_X64_RDX 1
+#define DW_REG_X64_RCX 2
+#define DW_REG_X64_RBX 3
+#define DW_REG_X64_RSI 4
+#define DW_REG_X64_RDI 5
+#define DW_REG_X64_RBP 6
+#define DW_REG_X64_RSP 7
+#define DW_REG_X64_RA 16
+
+// Register numbers for A64 (DWARF for the Arm 64-bit Architecture, ch. 4.1)
+#define DW_REG_A64_FP 29
+#define DW_REG_A64_LR 30
+#define DW_REG_A64_SP 31
+
+// X64 register mapping from real register index to DWARF2 (r8..r15 are mapped 1-1, but named registers aren't)
+const int regIndexToDwRegX64[16] = {DW_REG_X64_RAX, DW_REG_X64_RCX, DW_REG_X64_RDX, DW_REG_X64_RBX, DW_REG_X64_RSP, DW_REG_X64_RBP, DW_REG_X64_RSI,
+    DW_REG_X64_RDI, 8, 9, 10, 11, 12, 13, 14, 15};
+
+const int kCodeAlignFactor = 1;
+const int kDataAlignFactor = 8;
+const int kDwarfAlign = 8;
+const int kFdeInitialLocationOffset = 8;
+const int kFdeAddressRangeOffset = 16;
+
+// Define canonical frame address expression as [reg + offset]
+static uint8_t* defineCfaExpression(uint8_t* pos, int dwReg, uint32_t stackOffset)
+{
+    pos = writeu8(pos, DW_CFA_def_cfa);
+    pos = writeuleb128(pos, dwReg);
+    pos = writeuleb128(pos, stackOffset);
+    return pos;
+}
+
+// Update offset value in canonical frame address expression
+static uint8_t* defineCfaExpressionOffset(uint8_t* pos, uint32_t stackOffset)
+{
+    pos = writeu8(pos, DW_CFA_def_cfa_offset);
+    pos = writeuleb128(pos, stackOffset);
+    return pos;
+}
+
+static uint8_t* defineSavedRegisterLocation(uint8_t* pos, int dwReg, uint32_t stackOffset)
+{
+    LUAU_ASSERT(stackOffset % kDataAlignFactor == 0 && "stack offsets have to be measured in kDataAlignFactor units");
+
+    if (dwReg <= 0x3f)
+    {
+        pos = writeu8(pos, DW_CFA_offset + dwReg);
+    }
+    else
+    {
+        pos = writeu8(pos, DW_CFA_offset_extended);
+        pos = writeuleb128(pos, dwReg);
+    }
+
+    pos = writeuleb128(pos, stackOffset / kDataAlignFactor);
+    return pos;
+}
+
+static uint8_t* advanceLocation(uint8_t* pos, unsigned int offset)
+{
+    LUAU_ASSERT(offset < 256);
+    pos = writeu8(pos, DW_CFA_advance_loc1);
+    pos = writeu8(pos, offset);
+    return pos;
+}
+
+static uint8_t* alignPosition(uint8_t* start, uint8_t* pos)
+{
+    size_t size = pos - start;
+    size_t pad = ((size + kDwarfAlign - 1) & ~(kDwarfAlign - 1)) - size;
+
+    for (size_t i = 0; i < pad; i++)
+        pos = writeu8(pos, DW_CFA_nop);
+
+    return pos;
+}
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+void UnwindBuilderDwarf2::setBeginOffset(size_t beginOffset)
+{
+    this->beginOffset = beginOffset;
+}
+
+size_t UnwindBuilderDwarf2::getBeginOffset() const
+{
+    return beginOffset;
+}
+
+void UnwindBuilderDwarf2::startInfo(Arch arch)
+{
+    LUAU_ASSERT(arch == A64 || arch == X64);
+
+    uint8_t* cieLength = pos;
+    pos = writeu32(pos, 0); // Length (to be filled later)
+
+    pos = writeu32(pos, 0); // CIE id. 0 -- .eh_frame
+    pos = writeu8(pos, 1);  // Version
+
+    pos = writeu8(pos, 0); // CIE augmentation String ""
+
+    int ra = arch == A64 ? DW_REG_A64_LR : DW_REG_X64_RA;
+
+    pos = writeuleb128(pos, kCodeAlignFactor);         // Code align factor
+    pos = writeuleb128(pos, -kDataAlignFactor & 0x7f); // Data align factor of (as signed LEB128)
+    pos = writeu8(pos, ra);                            // Return address register
+
+    // Optional CIE augmentation section (not present)
+
+    // Call frame instructions (common for all FDEs)
+    if (arch == A64)
+    {
+        pos = defineCfaExpression(pos, DW_REG_A64_SP, 0); // Define CFA to be the sp
+    }
+    else
+    {
+        pos = defineCfaExpression(pos, DW_REG_X64_RSP, 8);        // Define CFA to be the rsp + 8
+        pos = defineSavedRegisterLocation(pos, DW_REG_X64_RA, 8); // Define return address register (RA) to be located at CFA - 8
+    }
+
+    pos = alignPosition(cieLength, pos);
+    writeu32(cieLength, unsigned(pos - cieLength - 4)); // Length field itself is excluded from length
+}
+
+void UnwindBuilderDwarf2::startFunction()
+{
+    // End offset is filled in later and everything gets adjusted at the end
+    UnwindFunctionDwarf2 func;
+    func.beginOffset = 0;
+    func.endOffset = 0;
+    func.fdeEntryStartPos = uint32_t(pos - rawData);
+    unwindFunctions.push_back(func);
+
+    fdeEntryStart = pos;                          // Will be written at the end
+    pos = writeu32(pos, 0);                       // Length (to be filled later)
+    pos = writeu32(pos, unsigned(pos - rawData)); // CIE pointer
+    pos = writeu64(pos, 0);                       // Initial location (to be filled later)
+    pos = writeu64(pos, 0);                       // Address range (to be filled later)
+
+    // Optional CIE augmentation section (not present)
+
+    // Function call frame instructions to follow
+}
+
+void UnwindBuilderDwarf2::finishFunction(uint32_t beginOffset, uint32_t endOffset)
+{
+    unwindFunctions.back().beginOffset = beginOffset;
+    unwindFunctions.back().endOffset = endOffset;
+
+    LUAU_ASSERT(fdeEntryStart != nullptr);
+
+    pos = alignPosition(fdeEntryStart, pos);
+    writeu32(fdeEntryStart, unsigned(pos - fdeEntryStart - 4)); // Length field itself is excluded from length
+}
+
+void UnwindBuilderDwarf2::finishInfo()
+{
+    // Terminate section
+    pos = writeu32(pos, 0);
+
+    LUAU_ASSERT(getSize() <= kRawDataLimit);
+}
+
+void UnwindBuilderDwarf2::prologueA64(uint32_t prologueSize, uint32_t stackSize, std::initializer_list<A64::RegisterA64> regs)
+{
+    LUAU_ASSERT(stackSize % 16 == 0);
+    LUAU_ASSERT(regs.size() >= 2 && regs.begin()[0] == A64::x29 && regs.begin()[1] == A64::x30);
+    LUAU_ASSERT(regs.size() * 8 <= stackSize);
+
+    // sub sp, sp, stackSize
+    pos = advanceLocation(pos, 4);
+    pos = defineCfaExpressionOffset(pos, stackSize);
+
+    // stp/str to store each register to stack in order
+    pos = advanceLocation(pos, prologueSize - 4);
+
+    for (size_t i = 0; i < regs.size(); ++i)
+    {
+        LUAU_ASSERT(regs.begin()[i].kind == A64::KindA64::x);
+        pos = defineSavedRegisterLocation(pos, regs.begin()[i].index, stackSize - unsigned(i * 8));
+    }
+}
+
+void UnwindBuilderDwarf2::prologueX64(uint32_t prologueSize, uint32_t stackSize, bool setupFrame, std::initializer_list<X64::RegisterX64> regs)
+{
+    LUAU_ASSERT(stackSize > 0 && stackSize <= 128 && stackSize % 8 == 0);
+
+    unsigned int stackOffset = 8; // Return address was pushed by calling the function
+    unsigned int prologueOffset = 0;
+
+    if (setupFrame)
+    {
+        // push rbp
+        stackOffset += 8;
+        prologueOffset += 2;
+        pos = advanceLocation(pos, 2);
+        pos = defineCfaExpressionOffset(pos, stackOffset);
+        pos = defineSavedRegisterLocation(pos, DW_REG_X64_RBP, stackOffset);
+
+        // mov rbp, rsp
+        prologueOffset += 3;
+        pos = advanceLocation(pos, 3);
+    }
+
+    // push reg
+    for (X64::RegisterX64 reg : regs)
+    {
+        LUAU_ASSERT(reg.size == X64::SizeX64::qword);
+
+        stackOffset += 8;
+        prologueOffset += 2;
+        pos = advanceLocation(pos, 2);
+        pos = defineCfaExpressionOffset(pos, stackOffset);
+        pos = defineSavedRegisterLocation(pos, regIndexToDwRegX64[reg.index], stackOffset);
+    }
+
+    // sub rsp, stackSize
+    stackOffset += stackSize;
+    prologueOffset += 4;
+    pos = advanceLocation(pos, 4);
+    pos = defineCfaExpressionOffset(pos, stackOffset);
+
+    LUAU_ASSERT(stackOffset % 16 == 0);
+    LUAU_ASSERT(prologueOffset == prologueSize);
+}
+
+size_t UnwindBuilderDwarf2::getSize() const
+{
+    return size_t(pos - rawData);
+}
+
+size_t UnwindBuilderDwarf2::getFunctionCount() const
+{
+    return unwindFunctions.size();
+}
+
+void UnwindBuilderDwarf2::finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const
+{
+    memcpy(target, rawData, getSize());
+
+    for (const UnwindFunctionDwarf2& func : unwindFunctions)
+    {
+        uint8_t* fdeEntry = (uint8_t*)target + func.fdeEntryStartPos;
+
+        writeu64(fdeEntry + kFdeInitialLocationOffset, uintptr_t(funcAddress) + offset + func.beginOffset);
+
+        if (func.endOffset == kFullBlockFuncton)
+            writeu64(fdeEntry + kFdeAddressRangeOffset, funcSize - offset);
+        else
+            writeu64(fdeEntry + kFdeAddressRangeOffset, func.endOffset - func.beginOffset);
+    }
+}
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/UnwindBuilderWin.cpp
+++ b/luau/CodeGen/src/UnwindBuilderWin.cpp
@ -0,0 +1,190 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "Luau/UnwindBuilderWin.h"
+
+#include <string.h>
+
+// Information about the Windows x64 unwinding data setup can be found at:
+// https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64 [x64 exception handling]
+
+#define UWOP_PUSH_NONVOL 0
+#define UWOP_ALLOC_LARGE 1
+#define UWOP_ALLOC_SMALL 2
+#define UWOP_SET_FPREG 3
+#define UWOP_SAVE_NONVOL 4
+#define UWOP_SAVE_NONVOL_FAR 5
+#define UWOP_SAVE_XMM128 8
+#define UWOP_SAVE_XMM128_FAR 9
+#define UWOP_PUSH_MACHFRAME 10
+
+namespace Luau
+{
+namespace CodeGen
+{
+
+void UnwindBuilderWin::setBeginOffset(size_t beginOffset)
+{
+    this->beginOffset = beginOffset;
+}
+
+size_t UnwindBuilderWin::getBeginOffset() const
+{
+    return beginOffset;
+}
+
+void UnwindBuilderWin::startInfo(Arch arch)
+{
+    LUAU_ASSERT(arch == X64);
+}
+
+void UnwindBuilderWin::startFunction()
+{
+    // End offset is filled in later and everything gets adjusted at the end
+    UnwindFunctionWin func;
+    func.beginOffset = 0;
+    func.endOffset = 0;
+    func.unwindInfoOffset = uint32_t(rawDataPos - rawData);
+    unwindFunctions.push_back(func);
+
+    unwindCodes.clear();
+    unwindCodes.reserve(16);
+
+    prologSize = 0;
+
+    // rax has register index 0, which in Windows unwind info means that frame register is not used
+    frameReg = X64::rax;
+    frameRegOffset = 0;
+}
+
+void UnwindBuilderWin::finishFunction(uint32_t beginOffset, uint32_t endOffset)
+{
+    unwindFunctions.back().beginOffset = beginOffset;
+    unwindFunctions.back().endOffset = endOffset;
+
+    // Windows unwind code count is stored in uint8_t, so we can't have more
+    LUAU_ASSERT(unwindCodes.size() < 256);
+
+    UnwindInfoWin info;
+    info.version = 1;
+    info.flags = 0; // No EH
+    info.prologsize = prologSize;
+    info.unwindcodecount = uint8_t(unwindCodes.size());
+
+    LUAU_ASSERT(frameReg.index < 16);
+    info.framereg = frameReg.index;
+
+    LUAU_ASSERT(frameRegOffset < 16);
+    info.frameregoff = frameRegOffset;
+
+    LUAU_ASSERT(rawDataPos + sizeof(info) <= rawData + kRawDataLimit);
+    memcpy(rawDataPos, &info, sizeof(info));
+    rawDataPos += sizeof(info);
+
+    if (!unwindCodes.empty())
+    {
+        // Copy unwind codes in reverse order
+        // Some unwind codes take up two array slots, but we don't use those atm
+        uint8_t* unwindCodePos = rawDataPos + sizeof(UnwindCodeWin) * (unwindCodes.size() - 1);
+        LUAU_ASSERT(unwindCodePos <= rawData + kRawDataLimit);
+
+        for (size_t i = 0; i < unwindCodes.size(); i++)
+        {
+            memcpy(unwindCodePos, &unwindCodes[i], sizeof(UnwindCodeWin));
+            unwindCodePos -= sizeof(UnwindCodeWin);
+        }
+    }
+
+    rawDataPos += sizeof(UnwindCodeWin) * unwindCodes.size();
+
+    // Size has to be even, but unwind code count doesn't have to
+    if (unwindCodes.size() % 2 != 0)
+        rawDataPos += sizeof(UnwindCodeWin);
+
+    LUAU_ASSERT(rawDataPos <= rawData + kRawDataLimit);
+}
+
+void UnwindBuilderWin::finishInfo() {}
+
+void UnwindBuilderWin::prologueA64(uint32_t prologueSize, uint32_t stackSize, std::initializer_list<A64::RegisterA64> regs)
+{
+    LUAU_ASSERT(!"Not implemented");
+}
+
+void UnwindBuilderWin::prologueX64(uint32_t prologueSize, uint32_t stackSize, bool setupFrame, std::initializer_list<X64::RegisterX64> regs)
+{
+    LUAU_ASSERT(stackSize > 0 && stackSize <= 128 && stackSize % 8 == 0);
+    LUAU_ASSERT(prologueSize < 256);
+
+    unsigned int stackOffset = 8; // Return address was pushed by calling the function
+    unsigned int prologueOffset = 0;
+
+    if (setupFrame)
+    {
+        // push rbp
+        stackOffset += 8;
+        prologueOffset += 2;
+        unwindCodes.push_back({uint8_t(prologueOffset), UWOP_PUSH_NONVOL, X64::rbp.index});
+
+        // mov rbp, rsp
+        prologueOffset += 3;
+        frameReg = X64::rbp;
+        frameRegOffset = 0;
+        unwindCodes.push_back({uint8_t(prologueOffset), UWOP_SET_FPREG, frameRegOffset});
+    }
+
+    // push reg
+    for (X64::RegisterX64 reg : regs)
+    {
+        LUAU_ASSERT(reg.size == X64::SizeX64::qword);
+
+        stackOffset += 8;
+        prologueOffset += 2;
+        unwindCodes.push_back({uint8_t(prologueOffset), UWOP_PUSH_NONVOL, reg.index});
+    }
+
+    // sub rsp, stackSize
+    stackOffset += stackSize;
+    prologueOffset += 4;
+    unwindCodes.push_back({uint8_t(prologueOffset), UWOP_ALLOC_SMALL, uint8_t((stackSize - 8) / 8)});
+
+    LUAU_ASSERT(stackOffset % 16 == 0);
+    LUAU_ASSERT(prologueOffset == prologueSize);
+
+    this->prologSize = prologueSize;
+}
+
+size_t UnwindBuilderWin::getSize() const
+{
+    return sizeof(UnwindFunctionWin) * unwindFunctions.size() + size_t(rawDataPos - rawData);
+}
+
+size_t UnwindBuilderWin::getFunctionCount() const
+{
+    return unwindFunctions.size();
+}
+
+void UnwindBuilderWin::finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const
+{
+    // Copy adjusted function information
+    for (UnwindFunctionWin func : unwindFunctions)
+    {
+        // Code will start after the unwind info
+        func.beginOffset += uint32_t(offset);
+
+        // Whole block is a part of a 'single function'
+        if (func.endOffset == kFullBlockFuncton)
+            func.endOffset = uint32_t(funcSize);
+        else
+            func.endOffset += uint32_t(offset);
+
+        // Unwind data is placed right after the RUNTIME_FUNCTION data
+        func.unwindInfoOffset += uint32_t(sizeof(UnwindFunctionWin) * unwindFunctions.size());
+        memcpy(target, &func, sizeof(func));
+        target += sizeof(func);
+    }
+
+    // Copy unwind codes
+    memcpy(target, rawData, size_t(rawDataPos - rawData));
+}
+
+} // namespace CodeGen
+} // namespace Luau
--- a/luau/CodeGen/src/lcodegen.cpp
+++ b/luau/CodeGen/src/lcodegen.cpp
@ -0,0 +1,21 @@
+// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
+#include "luacodegen.h"
+
+#include "Luau/CodeGen.h"
+
+#include "lapi.h"
+
+int luau_codegen_supported()
+{
+    return Luau::CodeGen::isSupported();
+}
+
+void luau_codegen_create(lua_State* L)
+{
+    Luau::CodeGen::create(L);
+}
+
+void luau_codegen_compile(lua_State* L, int idx)
+{
+    Luau::CodeGen::compile(L, idx);
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -6,6 +6,8 @@ pub struct Build {
    out_dir: Option<PathBuf>,
    target: Option<String>,
    host: Option<String>,
+    // Enable code generator (jit)
+    enable_codegen: bool,
 }

 pub struct Artifacts {
@ -22,6 +24,7 @@ impl Build {
            out_dir: env::var_os("OUT_DIR").map(|s| PathBuf::from(s).join("luau-build")),
            target: env::var("TARGET").ok(),
            host: env::var("HOST").ok(),
+            enable_codegen: false,
        }
    }

@ -40,6 +43,11 @@ impl Build {
        self
    }

+    pub fn enable_codegen(&mut self, enable: bool) -> &mut Build {
+        self.enable_codegen = enable;
+        self
+    }
+
    pub fn build(&mut self) -> Artifacts {
        let target = &self.target.as_ref().expect("TARGET not set")[..];
        let host = &self.host.as_ref().expect("HOST not set")[..];
@ -51,6 +59,8 @@ impl Build {
        let common_include_dir = source_dir_base.join("luau").join("Common").join("include");
        let ast_source_dir = source_dir_base.join("luau").join("Ast").join("src");
        let ast_include_dir = source_dir_base.join("luau").join("Ast").join("include");
+        let codegen_source_dir = source_dir_base.join("luau").join("CodeGen").join("src");
+        let codegen_include_dir = source_dir_base.join("luau").join("CodeGen").join("include");
        let compiler_source_dir = source_dir_base.join("luau").join("Compiler").join("src");
        let compiler_include_dir = source_dir_base
            .join("luau")
@ -81,6 +91,10 @@ impl Build {
            .flag_if_supported("/std:c++17") // MSVC
            .cpp(true);

+        if self.enable_codegen {
+            config.define("LUA_CUSTOM_EXECUTION", None);
+        }
+
        if cfg!(not(debug_assertions)) {
            config.define("NDEBUG", None);
            config.opt_level(2);
@ -98,6 +112,24 @@ impl Build {
            .out_dir(&lib_dir)
            .compile(ast_lib_name);

+        // Build CogeGen
+        let codegen_lib_name = "luaucodegen";
+        if self.enable_codegen {
+            config
+                .clone()
+                .include(&codegen_include_dir)
+                .include(&common_include_dir)
+                .include(&vm_include_dir)
+                .include(&vm_source_dir)
+                .define("LUACODEGEN_API", "extern \"C\"")
+                // Code generator uses lua VM internals, so we need to provide the same defines used to build VM
+                .define("LUA_API", "extern \"C\"")
+                .define("LUAI_MAXCSTACK", "100000")
+                .add_files_by_ext(&codegen_source_dir, "cpp")
+                .out_dir(&lib_dir)
+                .compile(codegen_lib_name);
+        }
+
        // Build Compiler
        let compiler_lib_name = "luaucompiler";
        config
@ -130,7 +162,7 @@ impl Build {
            fs::copy(compiler_include_dir.join(f), include_dir.join(f)).unwrap();
        }

-        Artifacts {
+        let mut artifacts = Artifacts {
            lib_dir,
            include_dir,
            libs: vec![
@ -139,7 +171,13 @@ impl Build {
                vm_lib_name.to_string(),
            ],
            cpp_stdlib: Self::get_cpp_link_stdlib(target),
+        };
+
+        if self.enable_codegen {
+            artifacts.libs.push(codegen_lib_name.to_string());
        }
+
+        artifacts
    }

    fn get_cpp_link_stdlib(target: &str) -> Option<String> {
--- a/testcrate/build.rs
+++ b/testcrate/build.rs
@ -1,5 +1,5 @@
 fn main() {
    println!("cargo:rerun-if-changed=build.rs");
-    let artifacts = luau0_src::Build::new().build();
+    let artifacts = luau0_src::Build::new().enable_codegen(true).build();
    artifacts.print_cargo_metadata();
 }
--- a/testcrate/src/lib.rs
+++ b/testcrate/src/lib.rs
@ -15,9 +15,14 @@ extern "C" {
    pub fn free(ptr: *mut c_void);

    pub fn luaL_newstate() -> *mut c_void;
+    pub fn lua_close(state: *mut c_void);
    pub fn luaL_openlibs(state: *mut c_void);
    pub fn lua_getfield(state: *mut c_void, index: c_int, k: *const c_char) -> c_int;
    pub fn lua_tolstring(state: *mut c_void, index: c_int, len: *mut c_long) -> *const c_char;
+    pub fn lua_call(state: *mut c_void, nargs: c_int, nresults: c_int);
+
+    pub fn lua_pushinteger(state: *mut c_void, n: c_int);
+    pub fn lua_tointegerx(state: *mut c_void, index: c_int, isnum: *mut c_int) -> c_int;

    pub fn luau_compile(
        source: *const c_char,
@ -32,6 +37,10 @@ extern "C" {
        size: usize,
        env: c_int,
    ) -> c_int;
+
+    pub fn luau_codegen_supported() -> c_int;
+    pub fn luau_codegen_create(state: *mut c_void);
+    pub fn luau_codegen_compile(state: *mut c_void, idx: c_int);
 }

 pub unsafe fn lua_getglobal(state: *mut c_void, k: *const c_char) {
@ -45,6 +54,11 @@ fn luau_works() {
        let state = luaL_newstate();
        assert!(state != ptr::null_mut());

+        // Enable JIT if supported
+        if luau_codegen_supported() != 0 {
+            luau_codegen_create(state);
+        }
+
        luaL_openlibs(state);

        let version = {
@ -56,7 +70,7 @@ fn luau_works() {

        assert_eq!(version, "Luau".as_bytes());

-        let code = "function sum(a, b) return a + b end\0";
+        let code = "local a, b = ... return a + b\0";
        let mut bytecode_size = 0;
        let bytecode = luau_compile(
            code.as_ptr().cast(),
@ -64,8 +78,21 @@ fn luau_works() {
            ptr::null_mut(),
            &mut bytecode_size,
        );
-        let result = luau_load(state, "test\0".as_ptr().cast(), bytecode, bytecode_size, 0);
+        let result = luau_load(state, "sum\0".as_ptr().cast(), bytecode, bytecode_size, 0);
        assert_eq!(result, 0);
        free(bytecode.cast());
+
+        // Compile the function (JIT, if supported)
+        if luau_codegen_supported() != 0 {
+            luau_codegen_compile(state, -1);
+        }
+
+        // Call the loaded function
+        lua_pushinteger(state, 123);
+        lua_pushinteger(state, 321);
+        lua_call(state, 2, 1);
+        assert_eq!(lua_tointegerx(state, -1, ptr::null_mut()), 444);
+
+        lua_close(state);
    }
 }