Add Luau CodeGen (jit implementation, currently experimental)

This commit is contained in:
Alex Orlenko 2023-05-20 22:49:37 +01:00
parent deb042b940
commit 3bfe1afb96
No known key found for this signature in database
GPG Key ID: 4C150C250863B96D
75 changed files with 22454 additions and 4 deletions

View File

@ -0,0 +1,61 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/RegisterA64.h"
#include <stddef.h>
namespace Luau
{
namespace CodeGen
{
namespace A64
{
enum class AddressKindA64 : uint8_t
{
imm, // reg + imm
reg, // reg + reg
// TODO:
// reg + reg << shift
// reg + sext(reg) << shift
// reg + uext(reg) << shift
};
struct AddressA64
{
// This is a little misleading since AddressA64 can encode offsets up to 1023*size where size depends on the load/store size
// For example, ldr x0, [reg+imm] is limited to 8 KB offsets assuming imm is divisible by 8, but loading into w0 reduces the range to 4 KB
static constexpr size_t kMaxOffset = 1023;
constexpr AddressA64(RegisterA64 base, int off = 0)
: kind(AddressKindA64::imm)
, base(base)
, offset(xzr)
, data(off)
{
LUAU_ASSERT(base.kind == KindA64::x || base == sp);
}
constexpr AddressA64(RegisterA64 base, RegisterA64 offset)
: kind(AddressKindA64::reg)
, base(base)
, offset(offset)
, data(0)
{
LUAU_ASSERT(base.kind == KindA64::x);
LUAU_ASSERT(offset.kind == KindA64::x);
}
AddressKindA64 kind;
RegisterA64 base;
RegisterA64 offset;
int data;
};
using mem = AddressA64;
} // namespace A64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,280 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/RegisterA64.h"
#include "Luau/AddressA64.h"
#include "Luau/ConditionA64.h"
#include "Luau/Label.h"
#include <string>
#include <vector>
namespace Luau
{
namespace CodeGen
{
namespace A64
{
enum FeaturesA64
{
Feature_JSCVT = 1 << 0,
};
class AssemblyBuilderA64
{
public:
explicit AssemblyBuilderA64(bool logText, unsigned int features = 0);
~AssemblyBuilderA64();
// Moves
void mov(RegisterA64 dst, RegisterA64 src);
void mov(RegisterA64 dst, int src); // macro
// Moves of 32-bit immediates get decomposed into one or more of these
void movz(RegisterA64 dst, uint16_t src, int shift = 0);
void movn(RegisterA64 dst, uint16_t src, int shift = 0);
void movk(RegisterA64 dst, uint16_t src, int shift = 0);
// Arithmetics
void add(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
void add(RegisterA64 dst, RegisterA64 src1, uint16_t src2);
void sub(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
void sub(RegisterA64 dst, RegisterA64 src1, uint16_t src2);
void neg(RegisterA64 dst, RegisterA64 src);
// Comparisons
// Note: some arithmetic instructions also have versions that update flags (ADDS etc) but we aren't using them atm
void cmp(RegisterA64 src1, RegisterA64 src2);
void cmp(RegisterA64 src1, uint16_t src2);
void csel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond);
void cset(RegisterA64 dst, ConditionA64 cond);
// Bitwise
void and_(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
void orr(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
void eor(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
void bic(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
void tst(RegisterA64 src1, RegisterA64 src2, int shift = 0);
void mvn_(RegisterA64 dst, RegisterA64 src);
// Bitwise with immediate
// Note: immediate must have a single contiguous sequence of 1 bits set of length 1..31
void and_(RegisterA64 dst, RegisterA64 src1, uint32_t src2);
void orr(RegisterA64 dst, RegisterA64 src1, uint32_t src2);
void eor(RegisterA64 dst, RegisterA64 src1, uint32_t src2);
void tst(RegisterA64 src1, uint32_t src2);
// Shifts
void lsl(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
void lsr(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
void asr(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
void ror(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
void clz(RegisterA64 dst, RegisterA64 src);
void rbit(RegisterA64 dst, RegisterA64 src);
// Shifts with immediates
// Note: immediate value must be in [0, 31] or [0, 63] range based on register type
void lsl(RegisterA64 dst, RegisterA64 src1, uint8_t src2);
void lsr(RegisterA64 dst, RegisterA64 src1, uint8_t src2);
void asr(RegisterA64 dst, RegisterA64 src1, uint8_t src2);
void ror(RegisterA64 dst, RegisterA64 src1, uint8_t src2);
// Bitfields
void ubfiz(RegisterA64 dst, RegisterA64 src, uint8_t f, uint8_t w);
void ubfx(RegisterA64 dst, RegisterA64 src, uint8_t f, uint8_t w);
void sbfiz(RegisterA64 dst, RegisterA64 src, uint8_t f, uint8_t w);
void sbfx(RegisterA64 dst, RegisterA64 src, uint8_t f, uint8_t w);
// Load
// Note: paired loads are currently omitted for simplicity
void ldr(RegisterA64 dst, AddressA64 src);
void ldrb(RegisterA64 dst, AddressA64 src);
void ldrh(RegisterA64 dst, AddressA64 src);
void ldrsb(RegisterA64 dst, AddressA64 src);
void ldrsh(RegisterA64 dst, AddressA64 src);
void ldrsw(RegisterA64 dst, AddressA64 src);
void ldp(RegisterA64 dst1, RegisterA64 dst2, AddressA64 src);
// Store
void str(RegisterA64 src, AddressA64 dst);
void strb(RegisterA64 src, AddressA64 dst);
void strh(RegisterA64 src, AddressA64 dst);
void stp(RegisterA64 src1, RegisterA64 src2, AddressA64 dst);
// Control flow
void b(Label& label);
void bl(Label& label);
void br(RegisterA64 src);
void blr(RegisterA64 src);
void ret();
// Conditional control flow
void b(ConditionA64 cond, Label& label);
void cbz(RegisterA64 src, Label& label);
void cbnz(RegisterA64 src, Label& label);
void tbz(RegisterA64 src, uint8_t bit, Label& label);
void tbnz(RegisterA64 src, uint8_t bit, Label& label);
// Address of embedded data
void adr(RegisterA64 dst, const void* ptr, size_t size);
void adr(RegisterA64 dst, uint64_t value);
void adr(RegisterA64 dst, double value);
// Address of code (label)
void adr(RegisterA64 dst, Label& label);
// Floating-point scalar moves
// Note: constant must be compatible with immediate floating point moves (see isFmovSupported)
void fmov(RegisterA64 dst, RegisterA64 src);
void fmov(RegisterA64 dst, double src);
// Floating-point scalar math
void fabs(RegisterA64 dst, RegisterA64 src);
void fadd(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
void fdiv(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
void fmul(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
void fneg(RegisterA64 dst, RegisterA64 src);
void fsqrt(RegisterA64 dst, RegisterA64 src);
void fsub(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
// Floating-point rounding and conversions
void frinta(RegisterA64 dst, RegisterA64 src);
void frintm(RegisterA64 dst, RegisterA64 src);
void frintp(RegisterA64 dst, RegisterA64 src);
void fcvt(RegisterA64 dst, RegisterA64 src);
void fcvtzs(RegisterA64 dst, RegisterA64 src);
void fcvtzu(RegisterA64 dst, RegisterA64 src);
void scvtf(RegisterA64 dst, RegisterA64 src);
void ucvtf(RegisterA64 dst, RegisterA64 src);
// Floating-point conversion to integer using JS rules (wrap around 2^32) and set Z flag
// note: this is part of ARM8.3 (JSCVT feature); support of this instruction needs to be checked at runtime
void fjcvtzs(RegisterA64 dst, RegisterA64 src);
// Floating-point comparisons
void fcmp(RegisterA64 src1, RegisterA64 src2);
void fcmpz(RegisterA64 src);
void fcsel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond);
// Run final checks
bool finalize();
// Places a label at current location and returns it
Label setLabel();
// Assigns label position to the current location
void setLabel(Label& label);
// Extracts code offset (in bytes) from label
uint32_t getLabelOffset(const Label& label)
{
LUAU_ASSERT(label.location != ~0u);
return label.location * 4;
}
void logAppend(const char* fmt, ...) LUAU_PRINTF_ATTR(2, 3);
uint32_t getCodeSize() const;
// Resulting data and code that need to be copied over one after the other
// The *end* of 'data' has to be aligned to 16 bytes, this will also align 'code'
std::vector<uint8_t> data;
std::vector<uint32_t> code;
std::string text;
const bool logText = false;
const unsigned int features = 0;
// Maximum immediate argument to functions like add/sub/cmp
static constexpr size_t kMaxImmediate = (1 << 12) - 1;
// Check if immediate mode mask is supported for bitwise operations (and/or/xor)
static bool isMaskSupported(uint32_t mask);
// Check if fmov can be used to synthesize a constant
static bool isFmovSupported(double value);
private:
// Instruction archetypes
void place0(const char* name, uint32_t word);
void placeSR3(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, int shift = 0, int N = 0);
void placeSR2(const char* name, RegisterA64 dst, RegisterA64 src, uint8_t op, uint8_t op2 = 0);
void placeR3(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, uint8_t op2);
void placeR1(const char* name, RegisterA64 dst, RegisterA64 src, uint32_t op);
void placeI12(const char* name, RegisterA64 dst, RegisterA64 src1, int src2, uint8_t op);
void placeI16(const char* name, RegisterA64 dst, int src, uint8_t op, int shift = 0);
void placeA(const char* name, RegisterA64 dst, AddressA64 src, uint16_t opsize, int sizelog);
void placeB(const char* name, Label& label, uint8_t op);
void placeBC(const char* name, Label& label, uint8_t op, uint8_t cond);
void placeBCR(const char* name, Label& label, uint8_t op, RegisterA64 cond);
void placeBR(const char* name, RegisterA64 src, uint32_t op);
void placeBTR(const char* name, Label& label, uint8_t op, RegisterA64 cond, uint8_t bit);
void placeADR(const char* name, RegisterA64 src, uint8_t op);
void placeADR(const char* name, RegisterA64 src, uint8_t op, Label& label);
void placeP(const char* name, RegisterA64 dst1, RegisterA64 dst2, AddressA64 src, uint8_t op, uint8_t opc, int sizelog);
void placeCS(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond, uint8_t op, uint8_t opc, int invert = 0);
void placeFCMP(const char* name, RegisterA64 src1, RegisterA64 src2, uint8_t op, uint8_t opc);
void placeFMOV(const char* name, RegisterA64 dst, double src, uint32_t op);
void placeBM(const char* name, RegisterA64 dst, RegisterA64 src1, uint32_t src2, uint8_t op);
void placeBFM(const char* name, RegisterA64 dst, RegisterA64 src1, int src2, uint8_t op, int immr, int imms);
void place(uint32_t word);
struct Patch
{
enum Kind
{
Imm26,
Imm19,
Imm14,
};
Kind kind : 2;
uint32_t label : 30;
uint32_t location;
};
void patchLabel(Label& label, Patch::Kind kind);
void patchOffset(uint32_t location, int value, Patch::Kind kind);
void commit();
LUAU_NOINLINE void extend();
// Data
size_t allocateData(size_t size, size_t align);
// Logging of assembly in text form
LUAU_NOINLINE void log(const char* opcode);
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src1, int src2);
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src);
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, int src, int shift = 0);
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, double src);
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, AddressA64 src);
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst1, RegisterA64 dst2, AddressA64 src);
LUAU_NOINLINE void log(const char* opcode, RegisterA64 src, Label label, int imm = -1);
LUAU_NOINLINE void log(const char* opcode, RegisterA64 src);
LUAU_NOINLINE void log(const char* opcode, Label label);
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond);
LUAU_NOINLINE void log(Label label);
LUAU_NOINLINE void log(RegisterA64 reg);
LUAU_NOINLINE void log(AddressA64 addr);
uint32_t nextLabel = 1;
std::vector<Patch> pendingLabels;
std::vector<uint32_t> labelLocations;
bool finalized = false;
bool overflowed = false;
size_t dataPos = 0;
uint32_t* codePos = nullptr;
uint32_t* codeEnd = nullptr;
};
} // namespace A64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,266 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/Common.h"
#include "Luau/DenseHash.h"
#include "Luau/Label.h"
#include "Luau/ConditionX64.h"
#include "Luau/OperandX64.h"
#include "Luau/RegisterX64.h"
#include <string>
#include <vector>
namespace Luau
{
namespace CodeGen
{
namespace X64
{
enum class RoundingModeX64
{
RoundToNearestEven = 0b00,
RoundToNegativeInfinity = 0b01,
RoundToPositiveInfinity = 0b10,
RoundToZero = 0b11,
};
enum class AlignmentDataX64
{
Nop,
Int3,
Ud2, // int3 will be used as a fall-back if it doesn't fit
};
enum class ABIX64
{
Windows,
SystemV,
};
class AssemblyBuilderX64
{
public:
explicit AssemblyBuilderX64(bool logText, ABIX64 abi);
explicit AssemblyBuilderX64(bool logText);
~AssemblyBuilderX64();
// Base two operand instructions with 9 opcode selection
void add(OperandX64 lhs, OperandX64 rhs);
void sub(OperandX64 lhs, OperandX64 rhs);
void cmp(OperandX64 lhs, OperandX64 rhs);
void and_(OperandX64 lhs, OperandX64 rhs);
void or_(OperandX64 lhs, OperandX64 rhs);
void xor_(OperandX64 lhs, OperandX64 rhs);
// Binary shift instructions with special rhs handling
void sal(OperandX64 lhs, OperandX64 rhs);
void sar(OperandX64 lhs, OperandX64 rhs);
void shl(OperandX64 lhs, OperandX64 rhs);
void shr(OperandX64 lhs, OperandX64 rhs);
void rol(OperandX64 lhs, OperandX64 rhs);
void ror(OperandX64 lhs, OperandX64 rhs);
// Two operand mov instruction has additional specialized encodings
void mov(OperandX64 lhs, OperandX64 rhs);
void mov64(RegisterX64 lhs, int64_t imm);
void movsx(RegisterX64 lhs, OperandX64 rhs);
void movzx(RegisterX64 lhs, OperandX64 rhs);
// Base one operand instruction with 2 opcode selection
void div(OperandX64 op);
void idiv(OperandX64 op);
void mul(OperandX64 op);
void imul(OperandX64 op);
void neg(OperandX64 op);
void not_(OperandX64 op);
void dec(OperandX64 op);
void inc(OperandX64 op);
// Additional forms of imul
void imul(OperandX64 lhs, OperandX64 rhs);
void imul(OperandX64 dst, OperandX64 lhs, int32_t rhs);
void test(OperandX64 lhs, OperandX64 rhs);
void lea(OperandX64 lhs, OperandX64 rhs);
void setcc(ConditionX64 cond, OperandX64 op);
void push(OperandX64 op);
void pop(OperandX64 op);
void ret();
// Control flow
void jcc(ConditionX64 cond, Label& label);
void jmp(Label& label);
void jmp(OperandX64 op);
void call(Label& label);
void call(OperandX64 op);
void int3();
void bsr(RegisterX64 dst, OperandX64 src);
void bsf(RegisterX64 dst, OperandX64 src);
// Code alignment
void nop(uint32_t length = 1);
void align(uint32_t alignment, AlignmentDataX64 data = AlignmentDataX64::Nop);
// AVX
void vaddpd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
void vaddps(OperandX64 dst, OperandX64 src1, OperandX64 src2);
void vaddsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
void vaddss(OperandX64 dst, OperandX64 src1, OperandX64 src2);
void vsubsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
void vmulsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
void vdivsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
void vandpd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
void vandnpd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
void vxorpd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
void vorpd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
void vucomisd(OperandX64 src1, OperandX64 src2);
void vcvttsd2si(OperandX64 dst, OperandX64 src);
void vcvtsi2sd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
void vcvtsd2ss(OperandX64 dst, OperandX64 src1, OperandX64 src2);
void vroundsd(OperandX64 dst, OperandX64 src1, OperandX64 src2, RoundingModeX64 roundingMode); // inexact
void vsqrtpd(OperandX64 dst, OperandX64 src);
void vsqrtps(OperandX64 dst, OperandX64 src);
void vsqrtsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
void vsqrtss(OperandX64 dst, OperandX64 src1, OperandX64 src2);
void vmovsd(OperandX64 dst, OperandX64 src);
void vmovsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
void vmovss(OperandX64 dst, OperandX64 src);
void vmovss(OperandX64 dst, OperandX64 src1, OperandX64 src2);
void vmovapd(OperandX64 dst, OperandX64 src);
void vmovaps(OperandX64 dst, OperandX64 src);
void vmovupd(OperandX64 dst, OperandX64 src);
void vmovups(OperandX64 dst, OperandX64 src);
void vmovq(OperandX64 lhs, OperandX64 rhs);
void vmaxsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
void vminsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
void vcmpltsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
void vblendvpd(RegisterX64 dst, RegisterX64 src1, OperandX64 mask, RegisterX64 src3);
// Run final checks
bool finalize();
// Places a label at current location and returns it
Label setLabel();
// Assigns label position to the current location
void setLabel(Label& label);
// Extracts code offset (in bytes) from label
uint32_t getLabelOffset(const Label& label)
{
LUAU_ASSERT(label.location != ~0u);
return label.location;
}
// Constant allocation (uses rip-relative addressing)
OperandX64 i64(int64_t value);
OperandX64 f32(float value);
OperandX64 f64(double value);
OperandX64 f32x4(float x, float y, float z, float w);
OperandX64 f64x2(double x, double y);
OperandX64 bytes(const void* ptr, size_t size, size_t align = 8);
void logAppend(const char* fmt, ...) LUAU_PRINTF_ATTR(2, 3);
uint32_t getCodeSize() const;
// Resulting data and code that need to be copied over one after the other
// The *end* of 'data' has to be aligned to 16 bytes, this will also align 'code'
std::vector<uint8_t> data;
std::vector<uint8_t> code;
std::string text;
const bool logText = false;
const ABIX64 abi;
private:
// Instruction archetypes
void placeBinary(const char* name, OperandX64 lhs, OperandX64 rhs, uint8_t codeimm8, uint8_t codeimm, uint8_t codeimmImm8, uint8_t code8rev,
uint8_t coderev, uint8_t code8, uint8_t code, uint8_t opreg);
void placeBinaryRegMemAndImm(OperandX64 lhs, OperandX64 rhs, uint8_t code8, uint8_t code, uint8_t codeImm8, uint8_t opreg);
void placeBinaryRegAndRegMem(OperandX64 lhs, OperandX64 rhs, uint8_t code8, uint8_t code);
void placeBinaryRegMemAndReg(OperandX64 lhs, OperandX64 rhs, uint8_t code8, uint8_t code);
void placeUnaryModRegMem(const char* name, OperandX64 op, uint8_t code8, uint8_t code, uint8_t opreg);
void placeShift(const char* name, OperandX64 lhs, OperandX64 rhs, uint8_t opreg);
void placeJcc(const char* name, Label& label, uint8_t cc);
void placeAvx(const char* name, OperandX64 dst, OperandX64 src, uint8_t code, bool setW, uint8_t mode, uint8_t prefix);
void placeAvx(const char* name, OperandX64 dst, OperandX64 src, uint8_t code, uint8_t coderev, bool setW, uint8_t mode, uint8_t prefix);
void placeAvx(const char* name, OperandX64 dst, OperandX64 src1, OperandX64 src2, uint8_t code, bool setW, uint8_t mode, uint8_t prefix);
void placeAvx(
const char* name, OperandX64 dst, OperandX64 src1, OperandX64 src2, uint8_t imm8, uint8_t code, bool setW, uint8_t mode, uint8_t prefix);
// Instruction components
void placeRegAndModRegMem(OperandX64 lhs, OperandX64 rhs, int32_t extraCodeBytes = 0);
void placeModRegMem(OperandX64 rhs, uint8_t regop, int32_t extraCodeBytes = 0);
void placeRex(RegisterX64 op);
void placeRex(OperandX64 op);
void placeRexNoW(OperandX64 op);
void placeRex(RegisterX64 lhs, OperandX64 rhs);
void placeVex(OperandX64 dst, OperandX64 src1, OperandX64 src2, bool setW, uint8_t mode, uint8_t prefix);
void placeImm8Or32(int32_t imm);
void placeImm8(int32_t imm);
void placeImm32(int32_t imm);
void placeImm64(int64_t imm);
void placeLabel(Label& label);
void place(uint8_t byte);
void commit();
LUAU_NOINLINE void extend();
// Data
size_t allocateData(size_t size, size_t align);
// Logging of assembly in text form (Intel asm with VS disassembly formatting)
LUAU_NOINLINE void log(const char* opcode);
LUAU_NOINLINE void log(const char* opcode, OperandX64 op);
LUAU_NOINLINE void log(const char* opcode, OperandX64 op1, OperandX64 op2);
LUAU_NOINLINE void log(const char* opcode, OperandX64 op1, OperandX64 op2, OperandX64 op3);
LUAU_NOINLINE void log(const char* opcode, OperandX64 op1, OperandX64 op2, OperandX64 op3, OperandX64 op4);
LUAU_NOINLINE void log(Label label);
LUAU_NOINLINE void log(const char* opcode, Label label);
void log(OperandX64 op);
const char* getSizeName(SizeX64 size) const;
const char* getRegisterName(RegisterX64 reg) const;
uint32_t nextLabel = 1;
std::vector<Label> pendingLabels;
std::vector<uint32_t> labelLocations;
DenseHashMap<uint64_t, int32_t> constCache64;
bool finalized = false;
size_t dataPos = 0;
uint8_t* codePos = nullptr;
uint8_t* codeEnd = nullptr;
};
} // namespace X64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,56 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include <vector>
#include <stddef.h>
#include <stdint.h>
namespace Luau
{
namespace CodeGen
{
constexpr uint32_t kCodeAlignment = 32;
struct CodeAllocator
{
CodeAllocator(size_t blockSize, size_t maxTotalSize);
~CodeAllocator();
// Places data and code into the executable page area
// To allow allocation while previously allocated code is already running, allocation has page granularity
// It's important to group functions together so that page alignment won't result in a lot of wasted space
bool allocate(
const uint8_t* data, size_t dataSize, const uint8_t* code, size_t codeSize, uint8_t*& result, size_t& resultSize, uint8_t*& resultCodeStart);
// Provided to callbacks
void* context = nullptr;
// Called when new block is created to create and setup the unwinding information for all the code in the block
// 'startOffset' reserves space for data at the beginning of the page
void* (*createBlockUnwindInfo)(void* context, uint8_t* block, size_t blockSize, size_t& startOffset) = nullptr;
// Called to destroy unwinding information returned by 'createBlockUnwindInfo'
void (*destroyBlockUnwindInfo)(void* context, void* unwindData) = nullptr;
// Unwind information can be placed inside the block with some implementation-specific reservations at the beginning
// But to simplify block space checks, we limit the max size of all that data
static const size_t kMaxReservedDataSize = 256;
bool allocateNewBlock(size_t& unwindInfoSize);
// Current block we use for allocations
uint8_t* blockPos = nullptr;
uint8_t* blockEnd = nullptr;
// All allocated blocks
std::vector<uint8_t*> blocks;
std::vector<void*> unwindInfos;
size_t blockSize = 0;
size_t maxTotalSize = 0;
};
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,19 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include <stddef.h>
#include <stdint.h>
namespace Luau
{
namespace CodeGen
{
// context must be an UnwindBuilder
void* createBlockUnwindInfo(void* context, uint8_t* block, size_t blockSize, size_t& startOffset);
void destroyBlockUnwindInfo(void* context, void* unwindData);
bool isUnwindSupported();
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,45 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include <string>
#include <stdint.h>
struct lua_State;
namespace Luau
{
namespace CodeGen
{
bool isSupported();
void create(lua_State* L);
// Builds target function and all inner functions
void compile(lua_State* L, int idx);
using AnnotatorFn = void (*)(void* context, std::string& result, int fid, int instpos);
struct AssemblyOptions
{
bool outputBinary = false;
bool includeAssembly = false;
bool includeIr = false;
bool includeOutlinedCode = false;
// Optional annotator function can be provided to describe each instruction, it takes function id and sequential instruction id
AnnotatorFn annotator = nullptr;
void* annotatorContext = nullptr;
};
// Generates assembly for target function and all inner functions
std::string getAssembly(lua_State* L, int idx, AssemblyOptions options = {});
using PerfLogFn = void (*)(void* context, uintptr_t addr, unsigned size, const char* symbol);
void setPerfLog(void* context, PerfLogFn logFn);
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,57 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
namespace Luau
{
namespace CodeGen
{
namespace A64
{
// See Table C1-1 on page C1-229 of Arm ARM for A-profile architecture
enum class ConditionA64
{
// EQ: integer (equal), floating-point (equal)
Equal,
// NE: integer (not equal), floating-point (not equal or unordered)
NotEqual,
// CS: integer (carry set), unsigned integer (greater than, equal), floating-point (greater than, equal or unordered)
CarrySet,
// CC: integer (carry clear), unsigned integer (less than), floating-point (less than)
CarryClear,
// MI: integer (negative), floating-point (less than)
Minus,
// PL: integer (positive or zero), floating-point (greater than, equal or unordered)
Plus,
// VS: integer (overflow), floating-point (unordered)
Overflow,
// VC: integer (no overflow), floating-point (ordered)
NoOverflow,
// HI: integer (unsigned higher), floating-point (greater than, or unordered)
UnsignedGreater,
// LS: integer (unsigned lower or same), floating-point (less than or equal)
UnsignedLessEqual,
// GE: integer (signed greater than or equal), floating-point (greater than or equal)
GreaterEqual,
// LT: integer (signed less than), floating-point (less than, or unordered)
Less,
// GT: integer (signed greater than), floating-point (greater than)
Greater,
// LE: integer (signed less than or equal), floating-point (less than, equal or unordered)
LessEqual,
// AL: always
Always,
Count
};
} // namespace A64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,47 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
namespace Luau
{
namespace CodeGen
{
enum class ConditionX64 : uint8_t
{
Overflow,
NoOverflow,
Carry,
NoCarry,
Below,
BelowEqual,
Above,
AboveEqual,
Equal,
Less,
LessEqual,
Greater,
GreaterEqual,
NotBelow,
NotBelowEqual,
NotAbove,
NotAboveEqual,
NotEqual,
NotLess,
NotLessEqual,
NotGreater,
NotGreaterEqual,
Zero,
NotZero,
Parity,
NotParity,
Count
};
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,99 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include <bitset>
#include <utility>
#include <vector>
#include <stdint.h>
namespace Luau
{
namespace CodeGen
{
struct IrBlock;
struct IrFunction;
void updateUseCounts(IrFunction& function);
void updateLastUseLocations(IrFunction& function);
uint32_t getNextInstUse(IrFunction& function, uint32_t targetInstIdx, uint32_t startInstIdx);
// Returns how many values are coming into the block (live in) and how many are coming out of the block (live out)
std::pair<uint32_t, uint32_t> getLiveInOutValueCount(IrFunction& function, IrBlock& block);
uint32_t getLiveInValueCount(IrFunction& function, IrBlock& block);
uint32_t getLiveOutValueCount(IrFunction& function, IrBlock& block);
struct RegisterSet
{
std::bitset<256> regs;
// If variadic sequence is active, we track register from which it starts
bool varargSeq = false;
uint8_t varargStart = 0;
};
void requireVariadicSequence(RegisterSet& sourceRs, const RegisterSet& defRs, uint8_t varargStart);
struct CfgInfo
{
std::vector<uint32_t> predecessors;
std::vector<uint32_t> predecessorsOffsets;
std::vector<uint32_t> successors;
std::vector<uint32_t> successorsOffsets;
// VM registers that are live when the block is entered
// Additionally, an active variadic sequence can exist at the entry of the block
std::vector<RegisterSet> in;
// VM registers that are defined inside the block
// It can also contain a variadic sequence definition if that hasn't been consumed inside the block
// Note that this means that checking 'def' set might not be enough to say that register has not been written to
std::vector<RegisterSet> def;
// VM registers that are coming out from the block
// These might be registers that are defined inside the block or have been defined at the entry of the block
// Additionally, an active variadic sequence can exist at the exit of the block
std::vector<RegisterSet> out;
// VM registers captured by nested closures
// This set can never have an active variadic sequence
RegisterSet captured;
};
void computeCfgInfo(IrFunction& function);
struct BlockIteratorWrapper
{
const uint32_t* itBegin = nullptr;
const uint32_t* itEnd = nullptr;
bool empty() const
{
return itBegin == itEnd;
}
size_t size() const
{
return size_t(itEnd - itBegin);
}
const uint32_t* begin() const
{
return itBegin;
}
const uint32_t* end() const
{
return itEnd;
}
};
BlockIteratorWrapper predecessors(const CfgInfo& cfg, uint32_t blockIdx);
BlockIteratorWrapper successors(const CfgInfo& cfg, uint32_t blockIdx);
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,117 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/Bytecode.h"
#include "Luau/Common.h"
#include "Luau/DenseHash.h"
#include "Luau/IrData.h"
#include <vector>
struct Proto;
typedef uint32_t Instruction;
namespace Luau
{
namespace CodeGen
{
struct AssemblyOptions;
struct IrBuilder
{
IrBuilder();
void buildFunctionIr(Proto* proto);
void rebuildBytecodeBasicBlocks(Proto* proto);
void translateInst(LuauOpcode op, const Instruction* pc, int i);
bool isInternalBlock(IrOp block);
void beginBlock(IrOp block);
void loadAndCheckTag(IrOp loc, uint8_t tag, IrOp fallback);
// Clones all instructions into the current block
// Source block that is cloned cannot use values coming in from a predecessor
void clone(const IrBlock& source, bool removeCurrentTerminator);
IrOp undef();
IrOp constBool(bool value);
IrOp constInt(int value);
IrOp constUint(unsigned value);
IrOp constDouble(double value);
IrOp constTag(uint8_t value);
IrOp constAny(IrConst constant, uint64_t asCommonKey);
IrOp cond(IrCondition cond);
IrOp inst(IrCmd cmd);
IrOp inst(IrCmd cmd, IrOp a);
IrOp inst(IrCmd cmd, IrOp a, IrOp b);
IrOp inst(IrCmd cmd, IrOp a, IrOp b, IrOp c);
IrOp inst(IrCmd cmd, IrOp a, IrOp b, IrOp c, IrOp d);
IrOp inst(IrCmd cmd, IrOp a, IrOp b, IrOp c, IrOp d, IrOp e);
IrOp inst(IrCmd cmd, IrOp a, IrOp b, IrOp c, IrOp d, IrOp e, IrOp f);
IrOp block(IrBlockKind kind); // Requested kind can be ignored if we are in an outlined sequence
IrOp blockAtInst(uint32_t index);
IrOp vmReg(uint8_t index);
IrOp vmConst(uint32_t index);
IrOp vmUpvalue(uint8_t index);
bool inTerminatedBlock = false;
bool activeFastcallFallback = false;
IrOp fastcallFallbackReturn;
IrFunction function;
uint32_t activeBlockIdx = ~0u;
std::vector<uint32_t> instIndexToBlock; // Block index at the bytecode instruction
// Similar to BytecodeBuilder, duplicate constants are removed used the same method
struct ConstantKey
{
IrConstKind kind;
// Note: this stores value* from IrConst; when kind is Double, this stores the same bits as double does but in uint64_t.
uint64_t value;
bool operator==(const ConstantKey& key) const
{
return kind == key.kind && value == key.value;
}
};
struct ConstantKeyHash
{
size_t operator()(const ConstantKey& key) const
{
// finalizer from MurmurHash64B
const uint32_t m = 0x5bd1e995;
uint32_t h1 = uint32_t(key.value);
uint32_t h2 = uint32_t(key.value >> 32) ^ (int(key.kind) * m);
h1 ^= h2 >> 18;
h1 *= m;
h2 ^= h1 >> 22;
h2 *= m;
h1 ^= h2 >> 17;
h1 *= m;
h2 ^= h1 >> 19;
h2 *= m;
// ... truncated to 32-bit output (normally hash is equal to (uint64_t(h1) << 32) | h2, but we only really need the lower 32-bit half)
return size_t(h2);
}
};
DenseHashMap<ConstantKey, uint32_t, ConstantKeyHash> constantMap;
};
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,84 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/AssemblyBuilderX64.h"
#include "Luau/IrData.h"
#include "Luau/OperandX64.h"
#include "Luau/RegisterX64.h"
#include <array>
// TODO: call wrapper can be used to suggest target registers for ScopedRegX64 to compute data into argument registers directly
namespace Luau
{
namespace CodeGen
{
namespace X64
{
struct IrRegAllocX64;
struct ScopedRegX64;
struct CallArgument
{
SizeX64 targetSize = SizeX64::none;
OperandX64 source = noreg;
IrOp sourceOp;
OperandX64 target = noreg;
bool candidate = true;
};
class IrCallWrapperX64
{
public:
IrCallWrapperX64(IrRegAllocX64& regs, AssemblyBuilderX64& build, uint32_t instIdx = kInvalidInstIdx);
void addArgument(SizeX64 targetSize, OperandX64 source, IrOp sourceOp = {});
void addArgument(SizeX64 targetSize, ScopedRegX64& scopedReg);
void call(const OperandX64& func);
RegisterX64 suggestNextArgumentRegister(SizeX64 size) const;
IrRegAllocX64& regs;
AssemblyBuilderX64& build;
uint32_t instIdx = ~0u;
private:
OperandX64 getNextArgumentTarget(SizeX64 size) const;
void countRegisterUses();
CallArgument* findNonInterferingArgument();
bool interferesWithOperand(const OperandX64& op, RegisterX64 reg) const;
bool interferesWithActiveSources(const CallArgument& targetArg, int targetArgIndex) const;
bool interferesWithActiveTarget(RegisterX64 sourceReg) const;
void moveToTarget(CallArgument& arg);
void freeSourceRegisters(CallArgument& arg);
void renameRegister(RegisterX64& target, RegisterX64 reg, RegisterX64 replacement);
void renameSourceRegisters(RegisterX64 reg, RegisterX64 replacement);
RegisterX64 findConflictingTarget() const;
void renameConflictingRegister(RegisterX64 conflict);
int getRegisterUses(RegisterX64 reg) const;
void addRegisterUse(RegisterX64 reg);
void removeRegisterUse(RegisterX64 reg);
static const int kMaxCallArguments = 6;
std::array<CallArgument, kMaxCallArguments> args;
int argCount = 0;
int gprPos = 0;
int xmmPos = 0;
OperandX64 funcOp;
// Internal counters for remaining register use counts
std::array<uint8_t, 16> gprUses;
std::array<uint8_t, 16> xmmUses;
};
} // namespace X64
} // namespace CodeGen
} // namespace Luau

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,45 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/IrData.h"
#include <string>
#include <vector>
namespace Luau
{
namespace CodeGen
{
struct CfgInfo;
const char* getCmdName(IrCmd cmd);
const char* getBlockKindName(IrBlockKind kind);
struct IrToStringContext
{
std::string& result;
const std::vector<IrBlock>& blocks;
const std::vector<IrConst>& constants;
const CfgInfo& cfg;
};
void toString(IrToStringContext& ctx, const IrInst& inst, uint32_t index);
void toString(IrToStringContext& ctx, const IrBlock& block, uint32_t index); // Block title
void toString(IrToStringContext& ctx, IrOp op);
void toString(std::string& result, IrConst constant);
void toStringDetailed(IrToStringContext& ctx, const IrBlock& block, uint32_t blockIdx, const IrInst& inst, uint32_t instIdx, bool includeUseInfo);
void toStringDetailed(IrToStringContext& ctx, const IrBlock& block, uint32_t index, bool includeUseInfo); // Block title
std::string toString(const IrFunction& function, bool includeUseInfo);
std::string dump(const IrFunction& function);
std::string toDot(const IrFunction& function, bool includeInst);
std::string dumpDot(const IrFunction& function, bool includeInst);
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,121 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/AssemblyBuilderX64.h"
#include "Luau/IrData.h"
#include "Luau/RegisterX64.h"
#include <array>
#include <initializer_list>
namespace Luau
{
namespace CodeGen
{
namespace X64
{
constexpr uint8_t kNoStackSlot = 0xff;
struct IrSpillX64
{
uint32_t instIdx = 0;
IrValueKind valueKind = IrValueKind::Unknown;
unsigned spillId = 0;
// Spill location can be a stack location or be empty
// When it's empty, it means that instruction value can be rematerialized
uint8_t stackSlot = kNoStackSlot;
RegisterX64 originalLoc = noreg;
};
struct IrRegAllocX64
{
IrRegAllocX64(AssemblyBuilderX64& build, IrFunction& function);
RegisterX64 allocReg(SizeX64 size, uint32_t instIdx);
RegisterX64 allocRegOrReuse(SizeX64 size, uint32_t instIdx, std::initializer_list<IrOp> oprefs);
RegisterX64 takeReg(RegisterX64 reg, uint32_t instIdx);
void freeReg(RegisterX64 reg);
void freeLastUseReg(IrInst& target, uint32_t instIdx);
void freeLastUseRegs(const IrInst& inst, uint32_t instIdx);
bool isLastUseReg(const IrInst& target, uint32_t instIdx) const;
bool shouldFreeGpr(RegisterX64 reg) const;
unsigned findSpillStackSlot(IrValueKind valueKind);
IrOp getRestoreOp(const IrInst& inst) const;
bool hasRestoreOp(const IrInst& inst) const;
OperandX64 getRestoreAddress(const IrInst& inst, IrOp restoreOp);
// Register used by instruction is about to be freed, have to find a way to restore value later
void preserve(IrInst& inst);
void restore(IrInst& inst, bool intoOriginalLocation);
void preserveAndFreeInstValues();
uint32_t findInstructionWithFurthestNextUse(const std::array<uint32_t, 16>& regInstUsers) const;
void assertFree(RegisterX64 reg) const;
void assertAllFree() const;
void assertNoSpills() const;
AssemblyBuilderX64& build;
IrFunction& function;
uint32_t currInstIdx = ~0u;
std::array<bool, 16> freeGprMap;
std::array<uint32_t, 16> gprInstUsers;
std::array<bool, 16> freeXmmMap;
std::array<uint32_t, 16> xmmInstUsers;
std::bitset<256> usedSpillSlots;
unsigned maxUsedSlot = 0;
unsigned nextSpillId = 1;
std::vector<IrSpillX64> spills;
};
struct ScopedRegX64
{
explicit ScopedRegX64(IrRegAllocX64& owner);
ScopedRegX64(IrRegAllocX64& owner, SizeX64 size);
ScopedRegX64(IrRegAllocX64& owner, RegisterX64 reg);
~ScopedRegX64();
ScopedRegX64(const ScopedRegX64&) = delete;
ScopedRegX64& operator=(const ScopedRegX64&) = delete;
void alloc(SizeX64 size);
void free();
RegisterX64 release();
IrRegAllocX64& owner;
RegisterX64 reg;
};
// When IR instruction makes a call under a condition that's not reflected as a real branch in IR,
// spilled values have to be restored to their exact original locations, so that both after a call
// and after the skip, values are found in the same place
struct ScopedSpills
{
explicit ScopedSpills(IrRegAllocX64& owner);
~ScopedSpills();
ScopedSpills(const ScopedSpills&) = delete;
ScopedSpills& operator=(const ScopedSpills&) = delete;
IrRegAllocX64& owner;
unsigned startSpillId = 0;
};
} // namespace X64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,258 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/Bytecode.h"
#include "Luau/Common.h"
#include "Luau/IrData.h"
namespace Luau
{
namespace CodeGen
{
struct IrBuilder;
inline bool isJumpD(LuauOpcode op)
{
switch (op)
{
case LOP_JUMP:
case LOP_JUMPIF:
case LOP_JUMPIFNOT:
case LOP_JUMPIFEQ:
case LOP_JUMPIFLE:
case LOP_JUMPIFLT:
case LOP_JUMPIFNOTEQ:
case LOP_JUMPIFNOTLE:
case LOP_JUMPIFNOTLT:
case LOP_FORNPREP:
case LOP_FORNLOOP:
case LOP_FORGPREP:
case LOP_FORGLOOP:
case LOP_FORGPREP_INEXT:
case LOP_FORGPREP_NEXT:
case LOP_JUMPBACK:
case LOP_JUMPXEQKNIL:
case LOP_JUMPXEQKB:
case LOP_JUMPXEQKN:
case LOP_JUMPXEQKS:
return true;
default:
return false;
}
}
inline bool isSkipC(LuauOpcode op)
{
switch (op)
{
case LOP_LOADB:
return true;
default:
return false;
}
}
inline bool isFastCall(LuauOpcode op)
{
switch (op)
{
case LOP_FASTCALL:
case LOP_FASTCALL1:
case LOP_FASTCALL2:
case LOP_FASTCALL2K:
return true;
default:
return false;
}
}
inline int getJumpTarget(uint32_t insn, uint32_t pc)
{
LuauOpcode op = LuauOpcode(LUAU_INSN_OP(insn));
if (isJumpD(op))
return int(pc + LUAU_INSN_D(insn) + 1);
else if (isFastCall(op))
return int(pc + LUAU_INSN_C(insn) + 2);
else if (isSkipC(op) && LUAU_INSN_C(insn))
return int(pc + LUAU_INSN_C(insn) + 1);
else if (op == LOP_JUMPX)
return int(pc + LUAU_INSN_E(insn) + 1);
else
return -1;
}
inline bool isBlockTerminator(IrCmd cmd)
{
switch (cmd)
{
case IrCmd::JUMP:
case IrCmd::JUMP_IF_TRUTHY:
case IrCmd::JUMP_IF_FALSY:
case IrCmd::JUMP_EQ_TAG:
case IrCmd::JUMP_EQ_INT:
case IrCmd::JUMP_LT_INT:
case IrCmd::JUMP_GE_UINT:
case IrCmd::JUMP_EQ_POINTER:
case IrCmd::JUMP_CMP_NUM:
case IrCmd::JUMP_CMP_ANY:
case IrCmd::JUMP_SLOT_MATCH:
case IrCmd::RETURN:
case IrCmd::FORGLOOP:
case IrCmd::FORGLOOP_FALLBACK:
case IrCmd::FORGPREP_XNEXT_FALLBACK:
case IrCmd::FALLBACK_FORGPREP:
return true;
default:
break;
}
return false;
}
inline bool isNonTerminatingJump(IrCmd cmd)
{
switch (cmd)
{
case IrCmd::TRY_NUM_TO_INDEX:
case IrCmd::TRY_CALL_FASTGETTM:
case IrCmd::CHECK_FASTCALL_RES:
case IrCmd::CHECK_TAG:
case IrCmd::CHECK_READONLY:
case IrCmd::CHECK_NO_METATABLE:
case IrCmd::CHECK_SAFE_ENV:
case IrCmd::CHECK_ARRAY_SIZE:
case IrCmd::CHECK_SLOT_MATCH:
case IrCmd::CHECK_NODE_NO_NEXT:
return true;
default:
break;
}
return false;
}
inline bool hasResult(IrCmd cmd)
{
switch (cmd)
{
case IrCmd::LOAD_TAG:
case IrCmd::LOAD_POINTER:
case IrCmd::LOAD_DOUBLE:
case IrCmd::LOAD_INT:
case IrCmd::LOAD_TVALUE:
case IrCmd::LOAD_NODE_VALUE_TV:
case IrCmd::LOAD_ENV:
case IrCmd::GET_ARR_ADDR:
case IrCmd::GET_SLOT_NODE_ADDR:
case IrCmd::GET_HASH_NODE_ADDR:
case IrCmd::ADD_INT:
case IrCmd::SUB_INT:
case IrCmd::ADD_NUM:
case IrCmd::SUB_NUM:
case IrCmd::MUL_NUM:
case IrCmd::DIV_NUM:
case IrCmd::MOD_NUM:
case IrCmd::MIN_NUM:
case IrCmd::MAX_NUM:
case IrCmd::UNM_NUM:
case IrCmd::FLOOR_NUM:
case IrCmd::CEIL_NUM:
case IrCmd::ROUND_NUM:
case IrCmd::SQRT_NUM:
case IrCmd::ABS_NUM:
case IrCmd::NOT_ANY:
case IrCmd::TABLE_LEN:
case IrCmd::NEW_TABLE:
case IrCmd::DUP_TABLE:
case IrCmd::TRY_NUM_TO_INDEX:
case IrCmd::TRY_CALL_FASTGETTM:
case IrCmd::INT_TO_NUM:
case IrCmd::UINT_TO_NUM:
case IrCmd::NUM_TO_INT:
case IrCmd::NUM_TO_UINT:
case IrCmd::SUBSTITUTE:
case IrCmd::INVOKE_FASTCALL:
case IrCmd::BITAND_UINT:
case IrCmd::BITXOR_UINT:
case IrCmd::BITOR_UINT:
case IrCmd::BITNOT_UINT:
case IrCmd::BITLSHIFT_UINT:
case IrCmd::BITRSHIFT_UINT:
case IrCmd::BITARSHIFT_UINT:
case IrCmd::BITLROTATE_UINT:
case IrCmd::BITRROTATE_UINT:
case IrCmd::BITCOUNTLZ_UINT:
case IrCmd::BITCOUNTRZ_UINT:
case IrCmd::INVOKE_LIBM:
return true;
default:
break;
}
return false;
}
inline bool hasSideEffects(IrCmd cmd)
{
if (cmd == IrCmd::INVOKE_FASTCALL)
return true;
// Instructions that don't produce a result most likely have other side-effects to make them useful
// Right now, a full switch would mirror the 'hasResult' function, so we use this simple condition
return !hasResult(cmd);
}
inline bool isPseudo(IrCmd cmd)
{
// Instructions that are used for internal needs and are not a part of final lowering
return cmd == IrCmd::NOP || cmd == IrCmd::SUBSTITUTE;
}
IrValueKind getCmdValueKind(IrCmd cmd);
bool isGCO(uint8_t tag);
// Manually add or remove use of an operand
void addUse(IrFunction& function, IrOp op);
void removeUse(IrFunction& function, IrOp op);
// Remove a single instruction
void kill(IrFunction& function, IrInst& inst);
// Remove a range of instructions
void kill(IrFunction& function, uint32_t start, uint32_t end);
// Remove a block, including all instructions inside
void kill(IrFunction& function, IrBlock& block);
// Replace a single operand and update use counts (can cause chain removal of dead code)
void replace(IrFunction& function, IrOp& original, IrOp replacement);
// Replace a single instruction
// Target instruction index instead of reference is used to handle introduction of a new block terminator
void replace(IrFunction& function, IrBlock& block, uint32_t instIdx, IrInst replacement);
// Replace instruction with a different value (using IrCmd::SUBSTITUTE)
void substitute(IrFunction& function, IrInst& inst, IrOp replacement);
// Replace instruction arguments that point to substitutions with target values
void applySubstitutions(IrFunction& function, IrOp& op);
void applySubstitutions(IrFunction& function, IrInst& inst);
// Compare numbers using IR condition value
bool compare(double a, double b, IrCondition cond);
// Perform constant folding on instruction at index
// For most instructions, successful folding results in a IrCmd::SUBSTITUTE
// But it can also be successful on conditional control-flow, replacing it with an unconditional IrCmd::JUMP
void foldConstants(IrBuilder& build, IrFunction& function, IrBlock& block, uint32_t instIdx);
uint32_t getNativeContextOffset(int bfid);
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,18 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include <stdint.h>
namespace Luau
{
namespace CodeGen
{
struct Label
{
uint32_t id = 0;
uint32_t location = ~0u;
};
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,145 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/Common.h"
#include "Luau/RegisterX64.h"
#include <stdint.h>
namespace Luau
{
namespace CodeGen
{
namespace X64
{
enum class CategoryX64 : uint8_t
{
reg,
mem,
imm,
};
struct OperandX64
{
constexpr OperandX64(RegisterX64 reg)
: cat(CategoryX64::reg)
, index(noreg)
, base(reg)
, memSize(SizeX64::none)
, scale(1)
, imm(0)
{
}
constexpr OperandX64(int32_t imm)
: cat(CategoryX64::imm)
, index(noreg)
, base(noreg)
, memSize(SizeX64::none)
, scale(1)
, imm(imm)
{
}
constexpr explicit OperandX64(SizeX64 size, RegisterX64 index, uint8_t scale, RegisterX64 base, int32_t disp)
: cat(CategoryX64::mem)
, index(index)
, base(base)
, memSize(size)
, scale(scale)
, imm(disp)
{
}
// Fields are carefully placed to make this struct fit into an 8 byte register
CategoryX64 cat;
RegisterX64 index;
RegisterX64 base;
SizeX64 memSize : 4;
uint8_t scale : 4;
int32_t imm;
constexpr OperandX64 operator[](OperandX64&& addr) const
{
LUAU_ASSERT(cat == CategoryX64::mem);
LUAU_ASSERT(index == noreg && scale == 1 && base == noreg && imm == 0);
LUAU_ASSERT(addr.memSize == SizeX64::none);
addr.cat = CategoryX64::mem;
addr.memSize = memSize;
return addr;
}
};
constexpr OperandX64 addr{SizeX64::none, noreg, 1, noreg, 0};
constexpr OperandX64 byte{SizeX64::byte, noreg, 1, noreg, 0};
constexpr OperandX64 word{SizeX64::word, noreg, 1, noreg, 0};
constexpr OperandX64 dword{SizeX64::dword, noreg, 1, noreg, 0};
constexpr OperandX64 qword{SizeX64::qword, noreg, 1, noreg, 0};
constexpr OperandX64 xmmword{SizeX64::xmmword, noreg, 1, noreg, 0};
constexpr OperandX64 ymmword{SizeX64::ymmword, noreg, 1, noreg, 0};
constexpr OperandX64 operator*(RegisterX64 reg, uint8_t scale)
{
if (scale == 1)
return OperandX64(reg);
LUAU_ASSERT(scale == 1 || scale == 2 || scale == 4 || scale == 8);
LUAU_ASSERT(reg.index != 0b100 && "can't scale SP");
return OperandX64(SizeX64::none, reg, scale, noreg, 0);
}
constexpr OperandX64 operator+(RegisterX64 reg, int32_t disp)
{
return OperandX64(SizeX64::none, noreg, 1, reg, disp);
}
constexpr OperandX64 operator-(RegisterX64 reg, int32_t disp)
{
return OperandX64(SizeX64::none, noreg, 1, reg, -disp);
}
constexpr OperandX64 operator+(RegisterX64 base, RegisterX64 index)
{
LUAU_ASSERT(index.index != 4 && "sp cannot be used as index");
LUAU_ASSERT(base.size == index.size);
return OperandX64(SizeX64::none, index, 1, base, 0);
}
constexpr OperandX64 operator+(OperandX64 op, int32_t disp)
{
LUAU_ASSERT(op.cat == CategoryX64::mem);
LUAU_ASSERT(op.memSize == SizeX64::none);
op.imm += disp;
return op;
}
constexpr OperandX64 operator+(OperandX64 op, RegisterX64 base)
{
LUAU_ASSERT(op.cat == CategoryX64::mem);
LUAU_ASSERT(op.memSize == SizeX64::none);
LUAU_ASSERT(op.base == noreg);
LUAU_ASSERT(op.index == noreg || op.index.size == base.size);
op.base = base;
return op;
}
constexpr OperandX64 operator+(RegisterX64 base, OperandX64 op)
{
LUAU_ASSERT(op.cat == CategoryX64::mem);
LUAU_ASSERT(op.memSize == SizeX64::none);
LUAU_ASSERT(op.base == noreg);
LUAU_ASSERT(op.index == noreg || op.index.size == base.size);
op.base = base;
return op;
}
} // namespace X64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,17 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/IrData.h"
namespace Luau
{
namespace CodeGen
{
struct IrBuilder;
void constPropInBlockChains(IrBuilder& build, bool useValueNumbering);
void createLinearBlocks(IrBuilder& build, bool useValueNumbering);
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,14 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/IrData.h"
namespace Luau
{
namespace CodeGen
{
void optimizeMemoryOperandsX64(IrFunction& function);
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,233 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/Common.h"
#include <stdint.h>
namespace Luau
{
namespace CodeGen
{
namespace A64
{
enum class KindA64 : uint8_t
{
none,
w, // 32-bit GPR
x, // 64-bit GPR
s, // 32-bit SIMD&FP scalar
d, // 64-bit SIMD&FP scalar
q, // 128-bit SIMD&FP vector
};
struct RegisterA64
{
KindA64 kind : 3;
uint8_t index : 5;
constexpr bool operator==(RegisterA64 rhs) const
{
return kind == rhs.kind && index == rhs.index;
}
constexpr bool operator!=(RegisterA64 rhs) const
{
return !(*this == rhs);
}
};
constexpr RegisterA64 castReg(KindA64 kind, RegisterA64 reg)
{
LUAU_ASSERT(kind != reg.kind);
LUAU_ASSERT(kind != KindA64::none && reg.kind != KindA64::none);
LUAU_ASSERT((kind == KindA64::w || kind == KindA64::x) == (reg.kind == KindA64::w || reg.kind == KindA64::x));
return RegisterA64{kind, reg.index};
}
// This is equivalent to castReg(KindA64::x), but is separate because it implies different semantics
// Specifically, there are cases when it's useful to treat a wN register as an xN register *after* it has been assigned a value
// Since all A64 instructions that write to wN implicitly zero the top half, this works when we need zero extension semantics
// Crucially, this is *not* safe on an ABI boundary - an int parameter in wN register may have anything in its top half in certain cases
// However, as long as our codegen doesn't use 32-bit truncation by using castReg x=>w, we can safely rely on this.
constexpr RegisterA64 zextReg(RegisterA64 reg)
{
LUAU_ASSERT(reg.kind == KindA64::w);
return RegisterA64{KindA64::x, reg.index};
}
constexpr RegisterA64 noreg{KindA64::none, 0};
constexpr RegisterA64 w0{KindA64::w, 0};
constexpr RegisterA64 w1{KindA64::w, 1};
constexpr RegisterA64 w2{KindA64::w, 2};
constexpr RegisterA64 w3{KindA64::w, 3};
constexpr RegisterA64 w4{KindA64::w, 4};
constexpr RegisterA64 w5{KindA64::w, 5};
constexpr RegisterA64 w6{KindA64::w, 6};
constexpr RegisterA64 w7{KindA64::w, 7};
constexpr RegisterA64 w8{KindA64::w, 8};
constexpr RegisterA64 w9{KindA64::w, 9};
constexpr RegisterA64 w10{KindA64::w, 10};
constexpr RegisterA64 w11{KindA64::w, 11};
constexpr RegisterA64 w12{KindA64::w, 12};
constexpr RegisterA64 w13{KindA64::w, 13};
constexpr RegisterA64 w14{KindA64::w, 14};
constexpr RegisterA64 w15{KindA64::w, 15};
constexpr RegisterA64 w16{KindA64::w, 16};
constexpr RegisterA64 w17{KindA64::w, 17};
constexpr RegisterA64 w18{KindA64::w, 18};
constexpr RegisterA64 w19{KindA64::w, 19};
constexpr RegisterA64 w20{KindA64::w, 20};
constexpr RegisterA64 w21{KindA64::w, 21};
constexpr RegisterA64 w22{KindA64::w, 22};
constexpr RegisterA64 w23{KindA64::w, 23};
constexpr RegisterA64 w24{KindA64::w, 24};
constexpr RegisterA64 w25{KindA64::w, 25};
constexpr RegisterA64 w26{KindA64::w, 26};
constexpr RegisterA64 w27{KindA64::w, 27};
constexpr RegisterA64 w28{KindA64::w, 28};
constexpr RegisterA64 w29{KindA64::w, 29};
constexpr RegisterA64 w30{KindA64::w, 30};
constexpr RegisterA64 wzr{KindA64::w, 31};
constexpr RegisterA64 x0{KindA64::x, 0};
constexpr RegisterA64 x1{KindA64::x, 1};
constexpr RegisterA64 x2{KindA64::x, 2};
constexpr RegisterA64 x3{KindA64::x, 3};
constexpr RegisterA64 x4{KindA64::x, 4};
constexpr RegisterA64 x5{KindA64::x, 5};
constexpr RegisterA64 x6{KindA64::x, 6};
constexpr RegisterA64 x7{KindA64::x, 7};
constexpr RegisterA64 x8{KindA64::x, 8};
constexpr RegisterA64 x9{KindA64::x, 9};
constexpr RegisterA64 x10{KindA64::x, 10};
constexpr RegisterA64 x11{KindA64::x, 11};
constexpr RegisterA64 x12{KindA64::x, 12};
constexpr RegisterA64 x13{KindA64::x, 13};
constexpr RegisterA64 x14{KindA64::x, 14};
constexpr RegisterA64 x15{KindA64::x, 15};
constexpr RegisterA64 x16{KindA64::x, 16};
constexpr RegisterA64 x17{KindA64::x, 17};
constexpr RegisterA64 x18{KindA64::x, 18};
constexpr RegisterA64 x19{KindA64::x, 19};
constexpr RegisterA64 x20{KindA64::x, 20};
constexpr RegisterA64 x21{KindA64::x, 21};
constexpr RegisterA64 x22{KindA64::x, 22};
constexpr RegisterA64 x23{KindA64::x, 23};
constexpr RegisterA64 x24{KindA64::x, 24};
constexpr RegisterA64 x25{KindA64::x, 25};
constexpr RegisterA64 x26{KindA64::x, 26};
constexpr RegisterA64 x27{KindA64::x, 27};
constexpr RegisterA64 x28{KindA64::x, 28};
constexpr RegisterA64 x29{KindA64::x, 29};
constexpr RegisterA64 x30{KindA64::x, 30};
constexpr RegisterA64 xzr{KindA64::x, 31};
constexpr RegisterA64 sp{KindA64::none, 31};
constexpr RegisterA64 s0{KindA64::s, 0};
constexpr RegisterA64 s1{KindA64::s, 1};
constexpr RegisterA64 s2{KindA64::s, 2};
constexpr RegisterA64 s3{KindA64::s, 3};
constexpr RegisterA64 s4{KindA64::s, 4};
constexpr RegisterA64 s5{KindA64::s, 5};
constexpr RegisterA64 s6{KindA64::s, 6};
constexpr RegisterA64 s7{KindA64::s, 7};
constexpr RegisterA64 s8{KindA64::s, 8};
constexpr RegisterA64 s9{KindA64::s, 9};
constexpr RegisterA64 s10{KindA64::s, 10};
constexpr RegisterA64 s11{KindA64::s, 11};
constexpr RegisterA64 s12{KindA64::s, 12};
constexpr RegisterA64 s13{KindA64::s, 13};
constexpr RegisterA64 s14{KindA64::s, 14};
constexpr RegisterA64 s15{KindA64::s, 15};
constexpr RegisterA64 s16{KindA64::s, 16};
constexpr RegisterA64 s17{KindA64::s, 17};
constexpr RegisterA64 s18{KindA64::s, 18};
constexpr RegisterA64 s19{KindA64::s, 19};
constexpr RegisterA64 s20{KindA64::s, 20};
constexpr RegisterA64 s21{KindA64::s, 21};
constexpr RegisterA64 s22{KindA64::s, 22};
constexpr RegisterA64 s23{KindA64::s, 23};
constexpr RegisterA64 s24{KindA64::s, 24};
constexpr RegisterA64 s25{KindA64::s, 25};
constexpr RegisterA64 s26{KindA64::s, 26};
constexpr RegisterA64 s27{KindA64::s, 27};
constexpr RegisterA64 s28{KindA64::s, 28};
constexpr RegisterA64 s29{KindA64::s, 29};
constexpr RegisterA64 s30{KindA64::s, 30};
constexpr RegisterA64 s31{KindA64::s, 31};
constexpr RegisterA64 d0{KindA64::d, 0};
constexpr RegisterA64 d1{KindA64::d, 1};
constexpr RegisterA64 d2{KindA64::d, 2};
constexpr RegisterA64 d3{KindA64::d, 3};
constexpr RegisterA64 d4{KindA64::d, 4};
constexpr RegisterA64 d5{KindA64::d, 5};
constexpr RegisterA64 d6{KindA64::d, 6};
constexpr RegisterA64 d7{KindA64::d, 7};
constexpr RegisterA64 d8{KindA64::d, 8};
constexpr RegisterA64 d9{KindA64::d, 9};
constexpr RegisterA64 d10{KindA64::d, 10};
constexpr RegisterA64 d11{KindA64::d, 11};
constexpr RegisterA64 d12{KindA64::d, 12};
constexpr RegisterA64 d13{KindA64::d, 13};
constexpr RegisterA64 d14{KindA64::d, 14};
constexpr RegisterA64 d15{KindA64::d, 15};
constexpr RegisterA64 d16{KindA64::d, 16};
constexpr RegisterA64 d17{KindA64::d, 17};
constexpr RegisterA64 d18{KindA64::d, 18};
constexpr RegisterA64 d19{KindA64::d, 19};
constexpr RegisterA64 d20{KindA64::d, 20};
constexpr RegisterA64 d21{KindA64::d, 21};
constexpr RegisterA64 d22{KindA64::d, 22};
constexpr RegisterA64 d23{KindA64::d, 23};
constexpr RegisterA64 d24{KindA64::d, 24};
constexpr RegisterA64 d25{KindA64::d, 25};
constexpr RegisterA64 d26{KindA64::d, 26};
constexpr RegisterA64 d27{KindA64::d, 27};
constexpr RegisterA64 d28{KindA64::d, 28};
constexpr RegisterA64 d29{KindA64::d, 29};
constexpr RegisterA64 d30{KindA64::d, 30};
constexpr RegisterA64 d31{KindA64::d, 31};
constexpr RegisterA64 q0{KindA64::q, 0};
constexpr RegisterA64 q1{KindA64::q, 1};
constexpr RegisterA64 q2{KindA64::q, 2};
constexpr RegisterA64 q3{KindA64::q, 3};
constexpr RegisterA64 q4{KindA64::q, 4};
constexpr RegisterA64 q5{KindA64::q, 5};
constexpr RegisterA64 q6{KindA64::q, 6};
constexpr RegisterA64 q7{KindA64::q, 7};
constexpr RegisterA64 q8{KindA64::q, 8};
constexpr RegisterA64 q9{KindA64::q, 9};
constexpr RegisterA64 q10{KindA64::q, 10};
constexpr RegisterA64 q11{KindA64::q, 11};
constexpr RegisterA64 q12{KindA64::q, 12};
constexpr RegisterA64 q13{KindA64::q, 13};
constexpr RegisterA64 q14{KindA64::q, 14};
constexpr RegisterA64 q15{KindA64::q, 15};
constexpr RegisterA64 q16{KindA64::q, 16};
constexpr RegisterA64 q17{KindA64::q, 17};
constexpr RegisterA64 q18{KindA64::q, 18};
constexpr RegisterA64 q19{KindA64::q, 19};
constexpr RegisterA64 q20{KindA64::q, 20};
constexpr RegisterA64 q21{KindA64::q, 21};
constexpr RegisterA64 q22{KindA64::q, 22};
constexpr RegisterA64 q23{KindA64::q, 23};
constexpr RegisterA64 q24{KindA64::q, 24};
constexpr RegisterA64 q25{KindA64::q, 25};
constexpr RegisterA64 q26{KindA64::q, 26};
constexpr RegisterA64 q27{KindA64::q, 27};
constexpr RegisterA64 q28{KindA64::q, 28};
constexpr RegisterA64 q29{KindA64::q, 29};
constexpr RegisterA64 q30{KindA64::q, 30};
constexpr RegisterA64 q31{KindA64::q, 31};
} // namespace A64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,152 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/Common.h"
#include <stdint.h>
namespace Luau
{
namespace CodeGen
{
namespace X64
{
enum class SizeX64 : uint8_t
{
none,
byte,
word,
dword,
qword,
xmmword,
ymmword,
};
struct RegisterX64
{
SizeX64 size : 3;
uint8_t index : 5;
constexpr bool operator==(RegisterX64 rhs) const
{
return size == rhs.size && index == rhs.index;
}
constexpr bool operator!=(RegisterX64 rhs) const
{
return !(*this == rhs);
}
};
constexpr RegisterX64 noreg{SizeX64::none, 16};
constexpr RegisterX64 rip{SizeX64::none, 0};
constexpr RegisterX64 al{SizeX64::byte, 0};
constexpr RegisterX64 cl{SizeX64::byte, 1};
constexpr RegisterX64 dl{SizeX64::byte, 2};
constexpr RegisterX64 bl{SizeX64::byte, 3};
constexpr RegisterX64 spl{SizeX64::byte, 4};
constexpr RegisterX64 bpl{SizeX64::byte, 5};
constexpr RegisterX64 sil{SizeX64::byte, 6};
constexpr RegisterX64 dil{SizeX64::byte, 7};
constexpr RegisterX64 r8b{SizeX64::byte, 8};
constexpr RegisterX64 r9b{SizeX64::byte, 9};
constexpr RegisterX64 r10b{SizeX64::byte, 10};
constexpr RegisterX64 r11b{SizeX64::byte, 11};
constexpr RegisterX64 r12b{SizeX64::byte, 12};
constexpr RegisterX64 r13b{SizeX64::byte, 13};
constexpr RegisterX64 r14b{SizeX64::byte, 14};
constexpr RegisterX64 r15b{SizeX64::byte, 15};
constexpr RegisterX64 eax{SizeX64::dword, 0};
constexpr RegisterX64 ecx{SizeX64::dword, 1};
constexpr RegisterX64 edx{SizeX64::dword, 2};
constexpr RegisterX64 ebx{SizeX64::dword, 3};
constexpr RegisterX64 esp{SizeX64::dword, 4};
constexpr RegisterX64 ebp{SizeX64::dword, 5};
constexpr RegisterX64 esi{SizeX64::dword, 6};
constexpr RegisterX64 edi{SizeX64::dword, 7};
constexpr RegisterX64 r8d{SizeX64::dword, 8};
constexpr RegisterX64 r9d{SizeX64::dword, 9};
constexpr RegisterX64 r10d{SizeX64::dword, 10};
constexpr RegisterX64 r11d{SizeX64::dword, 11};
constexpr RegisterX64 r12d{SizeX64::dword, 12};
constexpr RegisterX64 r13d{SizeX64::dword, 13};
constexpr RegisterX64 r14d{SizeX64::dword, 14};
constexpr RegisterX64 r15d{SizeX64::dword, 15};
constexpr RegisterX64 rax{SizeX64::qword, 0};
constexpr RegisterX64 rcx{SizeX64::qword, 1};
constexpr RegisterX64 rdx{SizeX64::qword, 2};
constexpr RegisterX64 rbx{SizeX64::qword, 3};
constexpr RegisterX64 rsp{SizeX64::qword, 4};
constexpr RegisterX64 rbp{SizeX64::qword, 5};
constexpr RegisterX64 rsi{SizeX64::qword, 6};
constexpr RegisterX64 rdi{SizeX64::qword, 7};
constexpr RegisterX64 r8{SizeX64::qword, 8};
constexpr RegisterX64 r9{SizeX64::qword, 9};
constexpr RegisterX64 r10{SizeX64::qword, 10};
constexpr RegisterX64 r11{SizeX64::qword, 11};
constexpr RegisterX64 r12{SizeX64::qword, 12};
constexpr RegisterX64 r13{SizeX64::qword, 13};
constexpr RegisterX64 r14{SizeX64::qword, 14};
constexpr RegisterX64 r15{SizeX64::qword, 15};
constexpr RegisterX64 xmm0{SizeX64::xmmword, 0};
constexpr RegisterX64 xmm1{SizeX64::xmmword, 1};
constexpr RegisterX64 xmm2{SizeX64::xmmword, 2};
constexpr RegisterX64 xmm3{SizeX64::xmmword, 3};
constexpr RegisterX64 xmm4{SizeX64::xmmword, 4};
constexpr RegisterX64 xmm5{SizeX64::xmmword, 5};
constexpr RegisterX64 xmm6{SizeX64::xmmword, 6};
constexpr RegisterX64 xmm7{SizeX64::xmmword, 7};
constexpr RegisterX64 xmm8{SizeX64::xmmword, 8};
constexpr RegisterX64 xmm9{SizeX64::xmmword, 9};
constexpr RegisterX64 xmm10{SizeX64::xmmword, 10};
constexpr RegisterX64 xmm11{SizeX64::xmmword, 11};
constexpr RegisterX64 xmm12{SizeX64::xmmword, 12};
constexpr RegisterX64 xmm13{SizeX64::xmmword, 13};
constexpr RegisterX64 xmm14{SizeX64::xmmword, 14};
constexpr RegisterX64 xmm15{SizeX64::xmmword, 15};
constexpr RegisterX64 ymm0{SizeX64::ymmword, 0};
constexpr RegisterX64 ymm1{SizeX64::ymmword, 1};
constexpr RegisterX64 ymm2{SizeX64::ymmword, 2};
constexpr RegisterX64 ymm3{SizeX64::ymmword, 3};
constexpr RegisterX64 ymm4{SizeX64::ymmword, 4};
constexpr RegisterX64 ymm5{SizeX64::ymmword, 5};
constexpr RegisterX64 ymm6{SizeX64::ymmword, 6};
constexpr RegisterX64 ymm7{SizeX64::ymmword, 7};
constexpr RegisterX64 ymm8{SizeX64::ymmword, 8};
constexpr RegisterX64 ymm9{SizeX64::ymmword, 9};
constexpr RegisterX64 ymm10{SizeX64::ymmword, 10};
constexpr RegisterX64 ymm11{SizeX64::ymmword, 11};
constexpr RegisterX64 ymm12{SizeX64::ymmword, 12};
constexpr RegisterX64 ymm13{SizeX64::ymmword, 13};
constexpr RegisterX64 ymm14{SizeX64::ymmword, 14};
constexpr RegisterX64 ymm15{SizeX64::ymmword, 15};
constexpr RegisterX64 byteReg(RegisterX64 reg)
{
return RegisterX64{SizeX64::byte, reg.index};
}
constexpr RegisterX64 wordReg(RegisterX64 reg)
{
return RegisterX64{SizeX64::word, reg.index};
}
constexpr RegisterX64 dwordReg(RegisterX64 reg)
{
return RegisterX64{SizeX64::dword, reg.index};
}
constexpr RegisterX64 qwordReg(RegisterX64 reg)
{
return RegisterX64{SizeX64::qword, reg.index};
}
} // namespace X64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,61 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/RegisterA64.h"
#include "Luau/RegisterX64.h"
#include <initializer_list>
#include <stddef.h>
#include <stdint.h>
namespace Luau
{
namespace CodeGen
{
// This value is used in 'finishFunction' to mark the function that spans to the end of the whole code block
static uint32_t kFullBlockFuncton = ~0u;
class UnwindBuilder
{
public:
enum Arch
{
X64,
A64
};
virtual ~UnwindBuilder() = default;
virtual void setBeginOffset(size_t beginOffset) = 0;
virtual size_t getBeginOffset() const = 0;
virtual void startInfo(Arch arch) = 0;
virtual void startFunction() = 0;
virtual void finishFunction(uint32_t beginOffset, uint32_t endOffset) = 0;
virtual void finishInfo() = 0;
// A64-specific; prologue must look like this:
// sub sp, sp, stackSize
// store sequence that saves regs to [sp..sp+regs.size*8) in the order specified in regs; regs should start with x29, x30 (fp, lr)
// mov x29, sp
virtual void prologueA64(uint32_t prologueSize, uint32_t stackSize, std::initializer_list<A64::RegisterA64> regs) = 0;
// X64-specific; prologue must look like this:
// optional, indicated by setupFrame:
// push rbp
// mov rbp, rsp
// push reg in the order specified in regs
// sub rsp, stackSize
virtual void prologueX64(uint32_t prologueSize, uint32_t stackSize, bool setupFrame, std::initializer_list<X64::RegisterX64> regs) = 0;
virtual size_t getSize() const = 0;
virtual size_t getFunctionCount() const = 0;
// This will place the unwinding data at the target address and might update values of some fields
virtual void finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const = 0;
};
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,54 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/RegisterX64.h"
#include "UnwindBuilder.h"
#include <vector>
namespace Luau
{
namespace CodeGen
{
struct UnwindFunctionDwarf2
{
uint32_t beginOffset;
uint32_t endOffset;
uint32_t fdeEntryStartPos;
};
class UnwindBuilderDwarf2 : public UnwindBuilder
{
public:
void setBeginOffset(size_t beginOffset) override;
size_t getBeginOffset() const override;
void startInfo(Arch arch) override;
void startFunction() override;
void finishFunction(uint32_t beginOffset, uint32_t endOffset) override;
void finishInfo() override;
void prologueA64(uint32_t prologueSize, uint32_t stackSize, std::initializer_list<A64::RegisterA64> regs) override;
void prologueX64(uint32_t prologueSize, uint32_t stackSize, bool setupFrame, std::initializer_list<X64::RegisterX64> regs) override;
size_t getSize() const override;
size_t getFunctionCount() const override;
void finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const override;
private:
size_t beginOffset = 0;
std::vector<UnwindFunctionDwarf2> unwindFunctions;
static const unsigned kRawDataLimit = 1024;
uint8_t rawData[kRawDataLimit];
uint8_t* pos = rawData;
// We will remember the FDE location to write some of the fields like entry length, function start and size later
uint8_t* fdeEntryStart = nullptr;
};
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,78 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/RegisterX64.h"
#include "UnwindBuilder.h"
#include <vector>
namespace Luau
{
namespace CodeGen
{
// This struct matches the layout of x64 RUNTIME_FUNCTION from winnt.h
struct UnwindFunctionWin
{
uint32_t beginOffset;
uint32_t endOffset;
uint32_t unwindInfoOffset;
};
// This struct matches the layout of x64 UNWIND_INFO from ehdata.h
struct UnwindInfoWin
{
uint8_t version : 3;
uint8_t flags : 5;
uint8_t prologsize;
uint8_t unwindcodecount;
uint8_t framereg : 4;
uint8_t frameregoff : 4;
};
// This struct matches the layout of UNWIND_CODE from ehdata.h
struct UnwindCodeWin
{
uint8_t offset;
uint8_t opcode : 4;
uint8_t opinfo : 4;
};
class UnwindBuilderWin : public UnwindBuilder
{
public:
void setBeginOffset(size_t beginOffset) override;
size_t getBeginOffset() const override;
void startInfo(Arch arch) override;
void startFunction() override;
void finishFunction(uint32_t beginOffset, uint32_t endOffset) override;
void finishInfo() override;
void prologueA64(uint32_t prologueSize, uint32_t stackSize, std::initializer_list<A64::RegisterA64> regs) override;
void prologueX64(uint32_t prologueSize, uint32_t stackSize, bool setupFrame, std::initializer_list<X64::RegisterX64> regs) override;
size_t getSize() const override;
size_t getFunctionCount() const override;
void finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const override;
private:
size_t beginOffset = 0;
static const unsigned kRawDataLimit = 1024;
uint8_t rawData[kRawDataLimit];
uint8_t* rawDataPos = rawData;
std::vector<UnwindFunctionWin> unwindFunctions;
// Windows unwind codes are written in reverse, so we have to collect them all first
std::vector<UnwindCodeWin> unwindCodes;
uint8_t prologSize = 0;
X64::RegisterX64 frameReg = X64::noreg;
uint8_t frameRegOffset = 0;
};
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,18 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
// Can be used to reconfigure visibility/exports for public APIs
#ifndef LUACODEGEN_API
#define LUACODEGEN_API extern
#endif
struct lua_State;
// returns 1 if Luau code generator is supported, 0 otherwise
LUACODEGEN_API int luau_codegen_supported(void);
// create an instance of Luau code generator. you must check that this feature is supported using luau_codegen_supported().
LUACODEGEN_API void luau_codegen_create(lua_State* L);
// build target function and all inner functions
LUACODEGEN_API void luau_codegen_compile(lua_State* L, int idx);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,56 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include <stdint.h>
#ifdef _MSC_VER
#include <intrin.h>
#endif
namespace Luau
{
namespace CodeGen
{
inline int countlz(uint32_t n)
{
#ifdef _MSC_VER
unsigned long rl;
return _BitScanReverse(&rl, n) ? 31 - int(rl) : 32;
#else
return n == 0 ? 32 : __builtin_clz(n);
#endif
}
inline int countrz(uint32_t n)
{
#ifdef _MSC_VER
unsigned long rl;
return _BitScanForward(&rl, n) ? int(rl) : 32;
#else
return n == 0 ? 32 : __builtin_ctz(n);
#endif
}
inline int lrotate(uint32_t u, int s)
{
// MSVC doesn't recognize the rotate form that is UB-safe
#ifdef _MSC_VER
return _rotl(u, s);
#else
return (u << (s & 31)) | (u >> ((32 - s) & 31));
#endif
}
inline int rrotate(uint32_t u, int s)
{
// MSVC doesn't recognize the rotate form that is UB-safe
#ifdef _MSC_VER
return _rotr(u, s);
#else
return (u >> (s & 31)) | (u << ((32 - s) & 31));
#endif
}
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,80 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/Common.h"
#if defined(LUAU_BIG_ENDIAN)
#include <endian.h>
#endif
#include <string.h>
inline uint8_t* writeu8(uint8_t* target, uint8_t value)
{
*target = value;
return target + sizeof(value);
}
inline uint8_t* writeu32(uint8_t* target, uint32_t value)
{
#if defined(LUAU_BIG_ENDIAN)
value = htole32(value);
#endif
memcpy(target, &value, sizeof(value));
return target + sizeof(value);
}
inline uint8_t* writeu64(uint8_t* target, uint64_t value)
{
#if defined(LUAU_BIG_ENDIAN)
value = htole64(value);
#endif
memcpy(target, &value, sizeof(value));
return target + sizeof(value);
}
inline uint8_t* writeuleb128(uint8_t* target, uint64_t value)
{
do
{
uint8_t byte = value & 0x7f;
value >>= 7;
if (value)
byte |= 0x80;
*target++ = byte;
} while (value);
return target;
}
inline uint8_t* writef32(uint8_t* target, float value)
{
#if defined(LUAU_BIG_ENDIAN)
static_assert(sizeof(float) == sizeof(uint32_t), "type size must match to reinterpret data");
uint32_t data;
memcpy(&data, &value, sizeof(value));
writeu32(target, data);
#else
memcpy(target, &value, sizeof(value));
#endif
return target + sizeof(value);
}
inline uint8_t* writef64(uint8_t* target, double value)
{
#if defined(LUAU_BIG_ENDIAN)
static_assert(sizeof(double) == sizeof(uint64_t), "type size must match to reinterpret data");
uint64_t data;
memcpy(&data, &value, sizeof(value));
writeu64(target, data);
#else
memcpy(target, &value, sizeof(value));
#endif
return target + sizeof(value);
}

View File

@ -0,0 +1,209 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/CodeAllocator.h"
#include "Luau/Common.h"
#include <string.h>
#if defined(_WIN32)
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <Windows.h>
const size_t kPageSize = 4096;
#else
#include <sys/mman.h>
#include <unistd.h>
#if defined(__FreeBSD__) && !(_POSIX_C_SOURCE >= 200112L)
const size_t kPageSize = getpagesize();
#else
const size_t kPageSize = sysconf(_SC_PAGESIZE);
#endif
#endif
static size_t alignToPageSize(size_t size)
{
return (size + kPageSize - 1) & ~(kPageSize - 1);
}
#if defined(_WIN32)
static uint8_t* allocatePages(size_t size)
{
return (uint8_t*)VirtualAlloc(nullptr, alignToPageSize(size), MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
}
static void freePages(uint8_t* mem, size_t size)
{
if (VirtualFree(mem, 0, MEM_RELEASE) == 0)
LUAU_ASSERT(!"failed to deallocate block memory");
}
static void makePagesExecutable(uint8_t* mem, size_t size)
{
LUAU_ASSERT((uintptr_t(mem) & (kPageSize - 1)) == 0);
LUAU_ASSERT(size == alignToPageSize(size));
DWORD oldProtect;
if (VirtualProtect(mem, size, PAGE_EXECUTE_READ, &oldProtect) == 0)
LUAU_ASSERT(!"failed to change page protection");
}
static void flushInstructionCache(uint8_t* mem, size_t size)
{
if (FlushInstructionCache(GetCurrentProcess(), mem, size) == 0)
LUAU_ASSERT(!"failed to flush instruction cache");
}
#else
static uint8_t* allocatePages(size_t size)
{
return (uint8_t*)mmap(nullptr, alignToPageSize(size), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
}
static void freePages(uint8_t* mem, size_t size)
{
if (munmap(mem, alignToPageSize(size)) != 0)
LUAU_ASSERT(!"failed to deallocate block memory");
}
static void makePagesExecutable(uint8_t* mem, size_t size)
{
LUAU_ASSERT((uintptr_t(mem) & (kPageSize - 1)) == 0);
LUAU_ASSERT(size == alignToPageSize(size));
if (mprotect(mem, size, PROT_READ | PROT_EXEC) != 0)
LUAU_ASSERT(!"failed to change page protection");
}
static void flushInstructionCache(uint8_t* mem, size_t size)
{
__builtin___clear_cache((char*)mem, (char*)mem + size);
}
#endif
namespace Luau
{
namespace CodeGen
{
CodeAllocator::CodeAllocator(size_t blockSize, size_t maxTotalSize)
: blockSize(blockSize)
, maxTotalSize(maxTotalSize)
{
LUAU_ASSERT(blockSize > kMaxReservedDataSize);
LUAU_ASSERT(maxTotalSize >= blockSize);
}
CodeAllocator::~CodeAllocator()
{
if (destroyBlockUnwindInfo)
{
for (void* unwindInfo : unwindInfos)
destroyBlockUnwindInfo(context, unwindInfo);
}
for (uint8_t* block : blocks)
freePages(block, blockSize);
}
bool CodeAllocator::allocate(
const uint8_t* data, size_t dataSize, const uint8_t* code, size_t codeSize, uint8_t*& result, size_t& resultSize, uint8_t*& resultCodeStart)
{
// 'Round up' to preserve code alignment
size_t alignedDataSize = (dataSize + (kCodeAlignment - 1)) & ~(kCodeAlignment - 1);
size_t totalSize = alignedDataSize + codeSize;
// Function has to fit into a single block with unwinding information
if (totalSize > blockSize - kMaxReservedDataSize)
return false;
size_t startOffset = 0;
// We might need a new block
if (totalSize > size_t(blockEnd - blockPos))
{
if (!allocateNewBlock(startOffset))
return false;
LUAU_ASSERT(totalSize <= size_t(blockEnd - blockPos));
}
LUAU_ASSERT((uintptr_t(blockPos) & (kPageSize - 1)) == 0); // Allocation starts on page boundary
size_t dataOffset = startOffset + alignedDataSize - dataSize;
size_t codeOffset = startOffset + alignedDataSize;
if (dataSize)
memcpy(blockPos + dataOffset, data, dataSize);
if (codeSize)
memcpy(blockPos + codeOffset, code, codeSize);
size_t pageAlignedSize = alignToPageSize(startOffset + totalSize);
makePagesExecutable(blockPos, pageAlignedSize);
flushInstructionCache(blockPos + codeOffset, codeSize);
result = blockPos + startOffset;
resultSize = totalSize;
resultCodeStart = blockPos + codeOffset;
// Ensure that future allocations from the block start from a page boundary.
// This is important since we use W^X, and writing to the previous page would require briefly removing
// executable bit from it, which may result in access violations if that code is being executed concurrently.
if (pageAlignedSize <= size_t(blockEnd - blockPos))
{
blockPos += pageAlignedSize;
LUAU_ASSERT((uintptr_t(blockPos) & (kPageSize - 1)) == 0);
LUAU_ASSERT(blockPos <= blockEnd);
}
else
{
// Future allocations will need to allocate fresh blocks
blockPos = blockEnd;
}
return true;
}
bool CodeAllocator::allocateNewBlock(size_t& unwindInfoSize)
{
// Stop allocating once we reach a global limit
if ((blocks.size() + 1) * blockSize > maxTotalSize)
return false;
uint8_t* block = allocatePages(blockSize);
if (!block)
return false;
blockPos = block;
blockEnd = block + blockSize;
blocks.push_back(block);
if (createBlockUnwindInfo)
{
void* unwindInfo = createBlockUnwindInfo(context, block, blockSize, unwindInfoSize);
// 'Round up' to preserve alignment of the following data and code
unwindInfoSize = (unwindInfoSize + (kCodeAlignment - 1)) & ~(kCodeAlignment - 1);
LUAU_ASSERT(unwindInfoSize <= kMaxReservedDataSize);
if (!unwindInfo)
return false;
unwindInfos.push_back(unwindInfo);
}
return true;
}
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,121 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/CodeBlockUnwind.h"
#include "Luau/CodeAllocator.h"
#include "Luau/UnwindBuilder.h"
#include <string.h>
#include <stdlib.h>
#if defined(_WIN32) && defined(_M_X64)
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <Windows.h>
#elif defined(__linux__) || defined(__APPLE__)
// Defined in unwind.h which may not be easily discoverable on various platforms
extern "C" void __register_frame(const void*);
extern "C" void __deregister_frame(const void*);
extern "C" void __unw_add_dynamic_fde() __attribute__((weak));
#endif
#if defined(__APPLE__) && defined(__aarch64__)
#include <sys/sysctl.h>
#endif
namespace Luau
{
namespace CodeGen
{
#if defined(__linux__) || defined(__APPLE__)
static void visitFdeEntries(char* pos, void (*cb)(const void*))
{
// When using glibc++ unwinder, we need to call __register_frame/__deregister_frame on the entire .eh_frame data
// When using libc++ unwinder (libunwind), each FDE has to be handled separately
// libc++ unwinder is the macOS unwinder, but on Linux the unwinder depends on the library the executable is linked with
// __unw_add_dynamic_fde is specific to libc++ unwinder, as such we determine the library based on its existence
if (__unw_add_dynamic_fde == nullptr)
return cb(pos);
for (;;)
{
unsigned partLength;
memcpy(&partLength, pos, sizeof(partLength));
if (partLength == 0) // Zero-length section signals completion
break;
unsigned partId;
memcpy(&partId, pos + 4, sizeof(partId));
if (partId != 0) // Skip CIE part
cb(pos); // CIE is found using an offset in FDE
pos += partLength + 4;
}
}
#endif
void* createBlockUnwindInfo(void* context, uint8_t* block, size_t blockSize, size_t& beginOffset)
{
UnwindBuilder* unwind = (UnwindBuilder*)context;
// All unwinding related data is placed together at the start of the block
size_t unwindSize = unwind->getSize();
unwindSize = (unwindSize + (kCodeAlignment - 1)) & ~(kCodeAlignment - 1); // Match code allocator alignment
LUAU_ASSERT(blockSize >= unwindSize);
char* unwindData = (char*)block;
unwind->finalize(unwindData, unwindSize, block, blockSize);
#if defined(_WIN32) && defined(_M_X64)
if (!RtlAddFunctionTable((RUNTIME_FUNCTION*)block, uint32_t(unwind->getFunctionCount()), uintptr_t(block)))
{
LUAU_ASSERT(!"failed to allocate function table");
return nullptr;
}
#elif defined(__linux__) || defined(__APPLE__)
visitFdeEntries(unwindData, __register_frame);
#endif
beginOffset = unwindSize + unwind->getBeginOffset();
return block;
}
void destroyBlockUnwindInfo(void* context, void* unwindData)
{
#if defined(_WIN32) && defined(_M_X64)
if (!RtlDeleteFunctionTable((RUNTIME_FUNCTION*)unwindData))
LUAU_ASSERT(!"failed to deallocate function table");
#elif defined(__linux__) || defined(__APPLE__)
visitFdeEntries((char*)unwindData, __deregister_frame);
#endif
}
bool isUnwindSupported()
{
#if defined(_WIN32) && defined(_M_X64)
return true;
#elif defined(__APPLE__) && defined(__aarch64__)
char ver[256];
size_t verLength = sizeof(ver);
// libunwind on macOS 12 and earlier (which maps to osrelease 21) assumes JIT frames use pointer authentication without a way to override that
return sysctlbyname("kern.osrelease", ver, &verLength, NULL, 0) == 0 && atoi(ver) >= 22;
#elif defined(__linux__) || defined(__APPLE__)
return true;
#else
return false;
#endif
}
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,616 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/CodeGen.h"
#include "Luau/Common.h"
#include "Luau/CodeAllocator.h"
#include "Luau/CodeBlockUnwind.h"
#include "Luau/IrAnalysis.h"
#include "Luau/IrBuilder.h"
#include "Luau/IrDump.h"
#include "Luau/IrUtils.h"
#include "Luau/OptimizeConstProp.h"
#include "Luau/OptimizeFinalX64.h"
#include "Luau/UnwindBuilder.h"
#include "Luau/UnwindBuilderDwarf2.h"
#include "Luau/UnwindBuilderWin.h"
#include "Luau/AssemblyBuilderA64.h"
#include "Luau/AssemblyBuilderX64.h"
#include "CustomExecUtils.h"
#include "NativeState.h"
#include "CodeGenA64.h"
#include "EmitCommonA64.h"
#include "IrLoweringA64.h"
#include "CodeGenX64.h"
#include "EmitCommonX64.h"
#include "EmitInstructionX64.h"
#include "IrLoweringX64.h"
#include "lapi.h"
#include <algorithm>
#include <memory>
#include <optional>
#if defined(__x86_64__) || defined(_M_X64)
#ifdef _MSC_VER
#include <intrin.h> // __cpuid
#else
#include <cpuid.h> // __cpuid
#endif
#endif
#if defined(__aarch64__)
#ifdef __APPLE__
#include <sys/sysctl.h>
#endif
#endif
LUAU_FASTFLAGVARIABLE(DebugCodegenNoOpt, false)
LUAU_FASTFLAGVARIABLE(DebugCodegenOptSize, false)
LUAU_FASTFLAGVARIABLE(DebugCodegenSkipNumbering, false)
namespace Luau
{
namespace CodeGen
{
static void* gPerfLogContext = nullptr;
static PerfLogFn gPerfLogFn = nullptr;
struct NativeProto
{
Proto* p;
void* execdata;
uintptr_t exectarget;
};
static NativeProto createNativeProto(Proto* proto, const IrBuilder& ir)
{
int sizecode = proto->sizecode;
uint32_t* instOffsets = new uint32_t[sizecode];
uint32_t instTarget = ir.function.bcMapping[0].asmLocation;
for (int i = 0; i < sizecode; i++)
{
LUAU_ASSERT(ir.function.bcMapping[i].asmLocation >= instTarget);
instOffsets[i] = ir.function.bcMapping[i].asmLocation - instTarget;
}
// entry target will be relocated when assembly is finalized
return {proto, instOffsets, instTarget};
}
static void destroyExecData(void* execdata)
{
delete[] static_cast<uint32_t*>(execdata);
}
static void logPerfFunction(Proto* p, uintptr_t addr, unsigned size)
{
LUAU_ASSERT(p->source);
const char* source = getstr(p->source);
source = (source[0] == '=' || source[0] == '@') ? source + 1 : "[string]";
char name[256];
snprintf(name, sizeof(name), "<luau> %s:%d %s", source, p->linedefined, p->debugname ? getstr(p->debugname) : "");
if (gPerfLogFn)
gPerfLogFn(gPerfLogContext, addr, size, name);
}
template<typename AssemblyBuilder, typename IrLowering>
static bool lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction& function, int bytecodeid, AssemblyOptions options)
{
// While we will need a better block ordering in the future, right now we want to mostly preserve build order with fallbacks outlined
std::vector<uint32_t> sortedBlocks;
sortedBlocks.reserve(function.blocks.size());
for (uint32_t i = 0; i < function.blocks.size(); i++)
sortedBlocks.push_back(i);
std::sort(sortedBlocks.begin(), sortedBlocks.end(), [&](uint32_t idxA, uint32_t idxB) {
const IrBlock& a = function.blocks[idxA];
const IrBlock& b = function.blocks[idxB];
// Place fallback blocks at the end
if ((a.kind == IrBlockKind::Fallback) != (b.kind == IrBlockKind::Fallback))
return (a.kind == IrBlockKind::Fallback) < (b.kind == IrBlockKind::Fallback);
// Try to order by instruction order
return a.start < b.start;
});
// For each IR instruction that begins a bytecode instruction, which bytecode instruction is it?
std::vector<uint32_t> bcLocations(function.instructions.size() + 1, ~0u);
for (size_t i = 0; i < function.bcMapping.size(); ++i)
{
uint32_t irLocation = function.bcMapping[i].irLocation;
if (irLocation != ~0u)
bcLocations[irLocation] = uint32_t(i);
}
bool outputEnabled = options.includeAssembly || options.includeIr;
IrToStringContext ctx{build.text, function.blocks, function.constants, function.cfg};
// We use this to skip outlined fallback blocks from IR/asm text output
size_t textSize = build.text.length();
uint32_t codeSize = build.getCodeSize();
bool seenFallback = false;
IrBlock dummy;
dummy.start = ~0u;
for (size_t i = 0; i < sortedBlocks.size(); ++i)
{
uint32_t blockIndex = sortedBlocks[i];
IrBlock& block = function.blocks[blockIndex];
if (block.kind == IrBlockKind::Dead)
continue;
LUAU_ASSERT(block.start != ~0u);
LUAU_ASSERT(block.finish != ~0u);
// If we want to skip fallback code IR/asm, we'll record when those blocks start once we see them
if (block.kind == IrBlockKind::Fallback && !seenFallback)
{
textSize = build.text.length();
codeSize = build.getCodeSize();
seenFallback = true;
}
if (options.includeIr)
{
build.logAppend("# ");
toStringDetailed(ctx, block, blockIndex, /* includeUseInfo */ true);
}
// Values can only reference restore operands in the current block
function.validRestoreOpBlockIdx = blockIndex;
build.setLabel(block.label);
for (uint32_t index = block.start; index <= block.finish; index++)
{
LUAU_ASSERT(index < function.instructions.size());
uint32_t bcLocation = bcLocations[index];
// If IR instruction is the first one for the original bytecode, we can annotate it with source code text
if (outputEnabled && options.annotator && bcLocation != ~0u)
{
options.annotator(options.annotatorContext, build.text, bytecodeid, bcLocation);
}
// If bytecode needs the location of this instruction for jumps, record it
if (bcLocation != ~0u)
{
Label label = (index == block.start) ? block.label : build.setLabel();
function.bcMapping[bcLocation].asmLocation = build.getLabelOffset(label);
}
IrInst& inst = function.instructions[index];
// Skip pseudo instructions, but make sure they are not used at this stage
// This also prevents them from getting into text output when that's enabled
if (isPseudo(inst.cmd))
{
LUAU_ASSERT(inst.useCount == 0);
continue;
}
// Either instruction result value is not referenced or the use count is not zero
LUAU_ASSERT(inst.lastUse == 0 || inst.useCount != 0);
if (options.includeIr)
{
build.logAppend("# ");
toStringDetailed(ctx, block, blockIndex, inst, index, /* includeUseInfo */ true);
}
IrBlock& next = i + 1 < sortedBlocks.size() ? function.blocks[sortedBlocks[i + 1]] : dummy;
lowering.lowerInst(inst, index, next);
if (lowering.hasError())
{
// Place labels for all blocks that we're skipping
// This is needed to avoid AssemblyBuilder assertions about jumps in earlier blocks with unplaced labels
for (size_t j = i + 1; j < sortedBlocks.size(); ++j)
{
IrBlock& abandoned = function.blocks[sortedBlocks[j]];
build.setLabel(abandoned.label);
}
return false;
}
}
lowering.finishBlock();
if (options.includeIr)
build.logAppend("#\n");
}
if (outputEnabled && !options.includeOutlinedCode && seenFallback)
{
build.text.resize(textSize);
if (options.includeAssembly)
build.logAppend("; skipping %u bytes of outlined code\n", unsigned((build.getCodeSize() - codeSize) * sizeof(build.code[0])));
}
return true;
}
[[maybe_unused]] static bool lowerIr(
X64::AssemblyBuilderX64& build, IrBuilder& ir, NativeState& data, ModuleHelpers& helpers, Proto* proto, AssemblyOptions options)
{
optimizeMemoryOperandsX64(ir.function);
X64::IrLoweringX64 lowering(build, helpers, data, ir.function);
return lowerImpl(build, lowering, ir.function, proto->bytecodeid, options);
}
[[maybe_unused]] static bool lowerIr(
A64::AssemblyBuilderA64& build, IrBuilder& ir, NativeState& data, ModuleHelpers& helpers, Proto* proto, AssemblyOptions options)
{
A64::IrLoweringA64 lowering(build, helpers, data, proto, ir.function);
return lowerImpl(build, lowering, ir.function, proto->bytecodeid, options);
}
template<typename AssemblyBuilder>
static std::optional<NativeProto> assembleFunction(AssemblyBuilder& build, NativeState& data, ModuleHelpers& helpers, Proto* proto, AssemblyOptions options)
{
if (options.includeAssembly || options.includeIr)
{
if (proto->debugname)
build.logAppend("; function %s(", getstr(proto->debugname));
else
build.logAppend("; function(");
for (int i = 0; i < proto->numparams; i++)
{
LocVar* var = proto->locvars ? &proto->locvars[proto->sizelocvars - proto->numparams + i] : nullptr;
if (var && var->varname)
build.logAppend("%s%s", i == 0 ? "" : ", ", getstr(var->varname));
else
build.logAppend("%s$arg%d", i == 0 ? "" : ", ", i);
}
if (proto->numparams != 0 && proto->is_vararg)
build.logAppend(", ...)");
else
build.logAppend(")");
if (proto->linedefined >= 0)
build.logAppend(" line %d\n", proto->linedefined);
else
build.logAppend("\n");
}
IrBuilder ir;
ir.buildFunctionIr(proto);
computeCfgInfo(ir.function);
if (!FFlag::DebugCodegenNoOpt)
{
bool useValueNumbering = !FFlag::DebugCodegenSkipNumbering;
constPropInBlockChains(ir, useValueNumbering);
if (!FFlag::DebugCodegenOptSize)
createLinearBlocks(ir, useValueNumbering);
}
if (!lowerIr(build, ir, data, helpers, proto, options))
{
if (build.logText)
build.logAppend("; skipping (can't lower)\n\n");
return std::nullopt;
}
if (build.logText)
build.logAppend("\n");
return createNativeProto(proto, ir);
}
static void onCloseState(lua_State* L)
{
destroyNativeState(L);
}
static void onDestroyFunction(lua_State* L, Proto* proto)
{
destroyExecData(proto->execdata);
proto->execdata = nullptr;
proto->exectarget = 0;
}
static int onEnter(lua_State* L, Proto* proto)
{
NativeState* data = getNativeState(L);
LUAU_ASSERT(proto->execdata);
LUAU_ASSERT(L->ci->savedpc >= proto->code && L->ci->savedpc < proto->code + proto->sizecode);
uintptr_t target = proto->exectarget + static_cast<uint32_t*>(proto->execdata)[L->ci->savedpc - proto->code];
// Returns 1 to finish the function in the VM
return GateFn(data->context.gateEntry)(L, proto, target, &data->context);
}
static void onSetBreakpoint(lua_State* L, Proto* proto, int instruction)
{
if (!proto->execdata)
return;
LUAU_ASSERT(!"native breakpoints are not implemented");
}
#if defined(__aarch64__)
static unsigned int getCpuFeaturesA64()
{
unsigned int result = 0;
#ifdef __APPLE__
int jscvt = 0;
size_t jscvtLen = sizeof(jscvt);
if (sysctlbyname("hw.optional.arm.FEAT_JSCVT", &jscvt, &jscvtLen, nullptr, 0) == 0 && jscvt == 1)
result |= A64::Feature_JSCVT;
#endif
return result;
}
#endif
bool isSupported()
{
if (!LUA_CUSTOM_EXECUTION)
return false;
if (LUA_EXTRA_SIZE != 1)
return false;
if (sizeof(TValue) != 16)
return false;
if (sizeof(LuaNode) != 32)
return false;
// Windows CRT uses stack unwinding in longjmp so we have to use unwind data; on other platforms, it's only necessary for C++ EH.
#if defined(_WIN32)
if (!isUnwindSupported())
return false;
#else
if (!LUA_USE_LONGJMP && !isUnwindSupported())
return false;
#endif
#if defined(__x86_64__) || defined(_M_X64)
int cpuinfo[4] = {};
#ifdef _MSC_VER
__cpuid(cpuinfo, 1);
#else
__cpuid(1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
#endif
// We require AVX1 support for VEX encoded XMM operations
// We also requre SSE4.1 support for ROUNDSD but the AVX check below covers it
// https://en.wikipedia.org/wiki/CPUID#EAX=1:_Processor_Info_and_Feature_Bits
if ((cpuinfo[2] & (1 << 28)) == 0)
return false;
return true;
#elif defined(__aarch64__)
return true;
#else
return false;
#endif
}
void create(lua_State* L)
{
LUAU_ASSERT(isSupported());
NativeState& data = *createNativeState(L);
#if defined(_WIN32)
data.unwindBuilder = std::make_unique<UnwindBuilderWin>();
#else
data.unwindBuilder = std::make_unique<UnwindBuilderDwarf2>();
#endif
data.codeAllocator.context = data.unwindBuilder.get();
data.codeAllocator.createBlockUnwindInfo = createBlockUnwindInfo;
data.codeAllocator.destroyBlockUnwindInfo = destroyBlockUnwindInfo;
initFunctions(data);
#if defined(__x86_64__) || defined(_M_X64)
if (!X64::initHeaderFunctions(data))
{
destroyNativeState(L);
return;
}
#elif defined(__aarch64__)
if (!A64::initHeaderFunctions(data))
{
destroyNativeState(L);
return;
}
#endif
if (gPerfLogFn)
gPerfLogFn(gPerfLogContext, uintptr_t(data.context.gateEntry), 4096, "<luau gate>");
lua_ExecutionCallbacks* ecb = getExecutionCallbacks(L);
ecb->close = onCloseState;
ecb->destroy = onDestroyFunction;
ecb->enter = onEnter;
ecb->setbreakpoint = onSetBreakpoint;
}
static void gatherFunctions(std::vector<Proto*>& results, Proto* proto)
{
if (results.size() <= size_t(proto->bytecodeid))
results.resize(proto->bytecodeid + 1);
// Skip protos that we've already compiled in this run: this happens because at -O2, inlined functions get their protos reused
if (results[proto->bytecodeid])
return;
results[proto->bytecodeid] = proto;
for (int i = 0; i < proto->sizep; i++)
gatherFunctions(results, proto->p[i]);
}
void compile(lua_State* L, int idx)
{
LUAU_ASSERT(lua_isLfunction(L, idx));
const TValue* func = luaA_toobject(L, idx);
// If initialization has failed, do not compile any functions
if (!getNativeState(L))
return;
#if defined(__aarch64__)
A64::AssemblyBuilderA64 build(/* logText= */ false, getCpuFeaturesA64());
#else
X64::AssemblyBuilderX64 build(/* logText= */ false);
#endif
NativeState* data = getNativeState(L);
std::vector<Proto*> protos;
gatherFunctions(protos, clvalue(func)->l.p);
ModuleHelpers helpers;
#if defined(__aarch64__)
A64::assembleHelpers(build, helpers);
#else
X64::assembleHelpers(build, helpers);
#endif
std::vector<NativeProto> results;
results.reserve(protos.size());
// Skip protos that have been compiled during previous invocations of CodeGen::compile
for (Proto* p : protos)
if (p && p->execdata == nullptr)
if (std::optional<NativeProto> np = assembleFunction(build, *data, helpers, p, {}))
results.push_back(*np);
// Very large modules might result in overflowing a jump offset; in this case we currently abandon the entire module
if (!build.finalize())
{
for (NativeProto result : results)
destroyExecData(result.execdata);
return;
}
// If no functions were assembled, we don't need to allocate/copy executable pages for helpers
if (results.empty())
return;
uint8_t* nativeData = nullptr;
size_t sizeNativeData = 0;
uint8_t* codeStart = nullptr;
if (!data->codeAllocator.allocate(build.data.data(), int(build.data.size()), reinterpret_cast<const uint8_t*>(build.code.data()),
int(build.code.size() * sizeof(build.code[0])), nativeData, sizeNativeData, codeStart))
{
for (NativeProto result : results)
destroyExecData(result.execdata);
return;
}
if (gPerfLogFn && results.size() > 0)
{
gPerfLogFn(gPerfLogContext, uintptr_t(codeStart), uint32_t(results[0].exectarget), "<luau helpers>");
for (size_t i = 0; i < results.size(); ++i)
{
uint32_t begin = uint32_t(results[i].exectarget);
uint32_t end = i + 1 < results.size() ? uint32_t(results[i + 1].exectarget) : uint32_t(build.code.size() * sizeof(build.code[0]));
LUAU_ASSERT(begin < end);
logPerfFunction(results[i].p, uintptr_t(codeStart) + begin, end - begin);
}
}
for (NativeProto result : results)
{
// the memory is now managed by VM and will be freed via onDestroyFunction
result.p->execdata = result.execdata;
result.p->exectarget = uintptr_t(codeStart) + result.exectarget;
}
}
std::string getAssembly(lua_State* L, int idx, AssemblyOptions options)
{
LUAU_ASSERT(lua_isLfunction(L, idx));
const TValue* func = luaA_toobject(L, idx);
#if defined(__aarch64__)
A64::AssemblyBuilderA64 build(/* logText= */ options.includeAssembly, getCpuFeaturesA64());
#else
X64::AssemblyBuilderX64 build(/* logText= */ options.includeAssembly);
#endif
NativeState data;
initFunctions(data);
std::vector<Proto*> protos;
gatherFunctions(protos, clvalue(func)->l.p);
ModuleHelpers helpers;
#if defined(__aarch64__)
A64::assembleHelpers(build, helpers);
#else
X64::assembleHelpers(build, helpers);
#endif
for (Proto* p : protos)
if (p)
if (std::optional<NativeProto> np = assembleFunction(build, data, helpers, p, options))
destroyExecData(np->execdata);
if (!build.finalize())
return std::string();
if (options.outputBinary)
return std::string(reinterpret_cast<const char*>(build.code.data()), reinterpret_cast<const char*>(build.code.data() + build.code.size())) +
std::string(build.data.begin(), build.data.end());
else
return build.text;
}
void setPerfLog(void* context, PerfLogFn logFn)
{
gPerfLogContext = context;
gPerfLogFn = logFn;
}
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,236 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "CodeGenA64.h"
#include "Luau/AssemblyBuilderA64.h"
#include "Luau/UnwindBuilder.h"
#include "BitUtils.h"
#include "CustomExecUtils.h"
#include "NativeState.h"
#include "EmitCommonA64.h"
#include "lstate.h"
namespace Luau
{
namespace CodeGen
{
namespace A64
{
struct EntryLocations
{
Label start;
Label prologueEnd;
Label epilogueStart;
};
static void emitExit(AssemblyBuilderA64& build, bool continueInVm)
{
build.mov(x0, continueInVm);
build.ldr(x1, mem(rNativeContext, offsetof(NativeContext, gateExit)));
build.br(x1);
}
static void emitInterrupt(AssemblyBuilderA64& build)
{
// x0 = pc offset
// x1 = return address in native code
Label skip;
// Stash return address in rBase; we need to reload rBase anyway
build.mov(rBase, x1);
// Load interrupt handler; it may be nullptr in case the update raced with the check before we got here
build.ldr(x2, mem(rState, offsetof(lua_State, global)));
build.ldr(x2, mem(x2, offsetof(global_State, cb.interrupt)));
build.cbz(x2, skip);
// Update savedpc; required in case interrupt errors
build.add(x0, rCode, x0);
build.ldr(x1, mem(rState, offsetof(lua_State, ci)));
build.str(x0, mem(x1, offsetof(CallInfo, savedpc)));
// Call interrupt
build.mov(x0, rState);
build.mov(w1, -1);
build.blr(x2);
// Check if we need to exit
build.ldrb(w0, mem(rState, offsetof(lua_State, status)));
build.cbz(w0, skip);
// L->ci->savedpc--
// note: recomputing this avoids having to stash x0
build.ldr(x1, mem(rState, offsetof(lua_State, ci)));
build.ldr(x0, mem(x1, offsetof(CallInfo, savedpc)));
build.sub(x0, x0, sizeof(Instruction));
build.str(x0, mem(x1, offsetof(CallInfo, savedpc)));
emitExit(build, /* continueInVm */ false);
build.setLabel(skip);
// Return back to caller; rBase has stashed return address
build.mov(x0, rBase);
emitUpdateBase(build); // interrupt may have reallocated stack
build.br(x0);
}
static void emitReentry(AssemblyBuilderA64& build, ModuleHelpers& helpers)
{
// x0 = closure object to reentry (equal to clvalue(L->ci->func))
// If the fallback requested an exit, we need to do this right away
build.cbz(x0, helpers.exitNoContinueVm);
emitUpdateBase(build);
// Need to update state of the current function before we jump away
build.ldr(x1, mem(x0, offsetof(Closure, l.p))); // cl->l.p aka proto
build.ldr(x2, mem(rState, offsetof(lua_State, ci))); // L->ci
// We need to check if the new frame can be executed natively
// TOOD: .flags and .savedpc load below can be fused with ldp
build.ldr(w3, mem(x2, offsetof(CallInfo, flags)));
build.tbz(x3, countrz(LUA_CALLINFO_CUSTOM), helpers.exitContinueVm);
build.mov(rClosure, x0);
build.ldr(rConstants, mem(x1, offsetof(Proto, k))); // proto->k
build.ldr(rCode, mem(x1, offsetof(Proto, code))); // proto->code
// Get instruction index from instruction pointer
// To get instruction index from instruction pointer, we need to divide byte offset by 4
// But we will actually need to scale instruction index by 4 back to byte offset later so it cancels out
build.ldr(x2, mem(x2, offsetof(CallInfo, savedpc))); // L->ci->savedpc
build.sub(x2, x2, rCode);
// Get new instruction location and jump to it
LUAU_ASSERT(offsetof(Proto, exectarget) == offsetof(Proto, execdata) + 8);
build.ldp(x3, x4, mem(x1, offsetof(Proto, execdata)));
build.ldr(w2, mem(x3, x2));
build.add(x4, x4, x2);
build.br(x4);
}
static EntryLocations buildEntryFunction(AssemblyBuilderA64& build, UnwindBuilder& unwind)
{
EntryLocations locations;
// Arguments: x0 = lua_State*, x1 = Proto*, x2 = native code pointer to jump to, x3 = NativeContext*
locations.start = build.setLabel();
// prologue
build.sub(sp, sp, kStackSize);
build.stp(x29, x30, mem(sp)); // fp, lr
// stash non-volatile registers used for execution environment
build.stp(x19, x20, mem(sp, 16));
build.stp(x21, x22, mem(sp, 32));
build.stp(x23, x24, mem(sp, 48));
build.mov(x29, sp); // this is only necessary if we maintain frame pointers, which we do in the JIT for now
locations.prologueEnd = build.setLabel();
uint32_t prologueSize = build.getLabelOffset(locations.prologueEnd) - build.getLabelOffset(locations.start);
// Setup native execution environment
build.mov(rState, x0);
build.mov(rNativeContext, x3);
build.ldr(rBase, mem(x0, offsetof(lua_State, base))); // L->base
build.ldr(rConstants, mem(x1, offsetof(Proto, k))); // proto->k
build.ldr(rCode, mem(x1, offsetof(Proto, code))); // proto->code
build.ldr(x9, mem(x0, offsetof(lua_State, ci))); // L->ci
build.ldr(x9, mem(x9, offsetof(CallInfo, func))); // L->ci->func
build.ldr(rClosure, mem(x9, offsetof(TValue, value.gc))); // L->ci->func->value.gc aka cl
// Jump to the specified instruction; further control flow will be handled with custom ABI with register setup from EmitCommonA64.h
build.br(x2);
// Even though we jumped away, we will return here in the end
locations.epilogueStart = build.setLabel();
// Cleanup and exit
build.ldp(x23, x24, mem(sp, 48));
build.ldp(x21, x22, mem(sp, 32));
build.ldp(x19, x20, mem(sp, 16));
build.ldp(x29, x30, mem(sp)); // fp, lr
build.add(sp, sp, kStackSize);
build.ret();
// Our entry function is special, it spans the whole remaining code area
unwind.startFunction();
unwind.prologueA64(prologueSize, kStackSize, {x29, x30, x19, x20, x21, x22, x23, x24});
unwind.finishFunction(build.getLabelOffset(locations.start), kFullBlockFuncton);
return locations;
}
bool initHeaderFunctions(NativeState& data)
{
AssemblyBuilderA64 build(/* logText= */ false);
UnwindBuilder& unwind = *data.unwindBuilder.get();
unwind.startInfo(UnwindBuilder::A64);
EntryLocations entryLocations = buildEntryFunction(build, unwind);
build.finalize();
unwind.finishInfo();
LUAU_ASSERT(build.data.empty());
uint8_t* codeStart = nullptr;
if (!data.codeAllocator.allocate(build.data.data(), int(build.data.size()), reinterpret_cast<const uint8_t*>(build.code.data()),
int(build.code.size() * sizeof(build.code[0])), data.gateData, data.gateDataSize, codeStart))
{
LUAU_ASSERT(!"failed to create entry function");
return false;
}
// Set the offset at the begining so that functions in new blocks will not overlay the locations
// specified by the unwind information of the entry function
unwind.setBeginOffset(build.getLabelOffset(entryLocations.prologueEnd));
data.context.gateEntry = codeStart + build.getLabelOffset(entryLocations.start);
data.context.gateExit = codeStart + build.getLabelOffset(entryLocations.epilogueStart);
return true;
}
void assembleHelpers(AssemblyBuilderA64& build, ModuleHelpers& helpers)
{
if (build.logText)
build.logAppend("; exitContinueVm\n");
helpers.exitContinueVm = build.setLabel();
emitExit(build, /* continueInVm */ true);
if (build.logText)
build.logAppend("; exitNoContinueVm\n");
helpers.exitNoContinueVm = build.setLabel();
emitExit(build, /* continueInVm */ false);
if (build.logText)
build.logAppend("; reentry\n");
helpers.reentry = build.setLabel();
emitReentry(build, helpers);
if (build.logText)
build.logAppend("; interrupt\n");
helpers.interrupt = build.setLabel();
emitInterrupt(build);
}
} // namespace A64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,22 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
namespace Luau
{
namespace CodeGen
{
struct NativeState;
struct ModuleHelpers;
namespace A64
{
class AssemblyBuilderA64;
bool initHeaderFunctions(NativeState& data);
void assembleHelpers(AssemblyBuilderA64& build, ModuleHelpers& helpers);
} // namespace A64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,959 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "CodeGenUtils.h"
#include "CustomExecUtils.h"
#include "lvm.h"
#include "lbuiltins.h"
#include "lbytecode.h"
#include "ldebug.h"
#include "ldo.h"
#include "lfunc.h"
#include "lgc.h"
#include "lmem.h"
#include "lnumutils.h"
#include "lstate.h"
#include "lstring.h"
#include "ltable.h"
#include <string.h>
LUAU_FASTFLAG(LuauUniformTopHandling)
// All external function calls that can cause stack realloc or Lua calls have to be wrapped in VM_PROTECT
// This makes sure that we save the pc (in case the Lua call needs to generate a backtrace) before the call,
// and restores the stack pointer after in case stack gets reallocated
// Should only be used on the slow paths.
#define VM_PROTECT(x) \
{ \
L->ci->savedpc = pc; \
{ \
x; \
}; \
base = L->base; \
}
// Some external functions can cause an error, but never reallocate the stack; for these, VM_PROTECT_PC() is
// a cheaper version of VM_PROTECT that can be called before the external call.
#define VM_PROTECT_PC() L->ci->savedpc = pc
#define VM_REG(i) (LUAU_ASSERT(unsigned(i) < unsigned(L->top - base)), &base[i])
#define VM_KV(i) (LUAU_ASSERT(unsigned(i) < unsigned(cl->l.p->sizek)), &k[i])
#define VM_UV(i) (LUAU_ASSERT(unsigned(i) < unsigned(cl->nupvalues)), &cl->l.uprefs[i])
#define VM_PATCH_C(pc, slot) *const_cast<Instruction*>(pc) = ((uint8_t(slot) << 24) | (0x00ffffffu & *(pc)))
#define VM_PATCH_E(pc, slot) *const_cast<Instruction*>(pc) = ((uint32_t(slot) << 8) | (0x000000ffu & *(pc)))
#define VM_INTERRUPT() \
{ \
void (*interrupt)(lua_State*, int) = L->global->cb.interrupt; \
if (LUAU_UNLIKELY(!!interrupt)) \
{ /* the interrupt hook is called right before we advance pc */ \
VM_PROTECT(L->ci->savedpc++; interrupt(L, -1)); \
if (L->status != 0) \
{ \
L->ci->savedpc--; \
return NULL; \
} \
} \
}
namespace Luau
{
namespace CodeGen
{
bool forgLoopTableIter(lua_State* L, Table* h, int index, TValue* ra)
{
int sizearray = h->sizearray;
// first we advance index through the array portion
while (unsigned(index) < unsigned(sizearray))
{
TValue* e = &h->array[index];
if (!ttisnil(e))
{
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)));
setnvalue(ra + 3, double(index + 1));
setobj2s(L, ra + 4, e);
return true;
}
index++;
}
int sizenode = 1 << h->lsizenode;
// then we advance index through the hash portion
while (unsigned(index - h->sizearray) < unsigned(sizenode))
{
LuaNode* n = &h->node[index - sizearray];
if (!ttisnil(gval(n)))
{
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)));
getnodekey(L, ra + 3, n);
setobj(L, ra + 4, gval(n));
return true;
}
index++;
}
return false;
}
bool forgLoopNodeIter(lua_State* L, Table* h, int index, TValue* ra)
{
int sizearray = h->sizearray;
int sizenode = 1 << h->lsizenode;
// then we advance index through the hash portion
while (unsigned(index - sizearray) < unsigned(sizenode))
{
LuaNode* n = &h->node[index - sizearray];
if (!ttisnil(gval(n)))
{
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)));
getnodekey(L, ra + 3, n);
setobj(L, ra + 4, gval(n));
return true;
}
index++;
}
return false;
}
bool forgLoopNonTableFallback(lua_State* L, int insnA, int aux)
{
TValue* base = L->base;
TValue* ra = VM_REG(insnA);
// note: it's safe to push arguments past top for complicated reasons (see lvmexecute.cpp)
setobj2s(L, ra + 3 + 2, ra + 2);
setobj2s(L, ra + 3 + 1, ra + 1);
setobj2s(L, ra + 3, ra);
L->top = ra + 3 + 3; // func + 2 args (state and index)
LUAU_ASSERT(L->top <= L->stack_last);
luaD_call(L, ra + 3, uint8_t(aux));
L->top = L->ci->top;
// recompute ra since stack might have been reallocated
base = L->base;
ra = VM_REG(insnA);
// copy first variable back into the iteration index
setobj2s(L, ra + 2, ra + 3);
return !ttisnil(ra + 3);
}
void forgPrepXnextFallback(lua_State* L, TValue* ra, int pc)
{
if (!ttisfunction(ra))
{
Closure* cl = clvalue(L->ci->func);
L->ci->savedpc = cl->l.p->code + pc;
luaG_typeerror(L, ra, "iterate over");
}
}
Closure* callProlog(lua_State* L, TValue* ra, StkId argtop, int nresults)
{
// slow-path: not a function call
if (LUAU_UNLIKELY(!ttisfunction(ra)))
{
luaV_tryfuncTM(L, ra);
argtop++; // __call adds an extra self
}
Closure* ccl = clvalue(ra);
CallInfo* ci = incr_ci(L);
ci->func = ra;
ci->base = ra + 1;
ci->top = argtop + ccl->stacksize; // note: technically UB since we haven't reallocated the stack yet
ci->savedpc = NULL;
ci->flags = 0;
ci->nresults = nresults;
L->base = ci->base;
L->top = argtop;
// note: this reallocs stack, but we don't need to VM_PROTECT this
// this is because we're going to modify base/savedpc manually anyhow
// crucially, we can't use ra/argtop after this line
luaD_checkstack(L, ccl->stacksize);
return ccl;
}
void callEpilogC(lua_State* L, int nresults, int n)
{
// ci is our callinfo, cip is our parent
CallInfo* ci = L->ci;
CallInfo* cip = ci - 1;
// copy return values into parent stack (but only up to nresults!), fill the rest with nil
// note: in MULTRET context nresults starts as -1 so i != 0 condition never activates intentionally
StkId res = ci->func;
StkId vali = L->top - n;
StkId valend = L->top;
int i;
for (i = nresults; i != 0 && vali < valend; i--)
setobj2s(L, res++, vali++);
while (i-- > 0)
setnilvalue(res++);
// pop the stack frame
L->ci = cip;
L->base = cip->base;
L->top = (nresults == LUA_MULTRET) ? res : cip->top;
}
// Extracted as-is from lvmexecute.cpp with the exception of control flow (reentry) and removed interrupts/savedpc
Closure* callFallback(lua_State* L, StkId ra, StkId argtop, int nresults)
{
// slow-path: not a function call
if (LUAU_UNLIKELY(!ttisfunction(ra)))
{
luaV_tryfuncTM(L, ra);
argtop++; // __call adds an extra self
}
Closure* ccl = clvalue(ra);
CallInfo* ci = incr_ci(L);
ci->func = ra;
ci->base = ra + 1;
ci->top = argtop + ccl->stacksize; // note: technically UB since we haven't reallocated the stack yet
ci->savedpc = NULL;
ci->flags = 0;
ci->nresults = nresults;
L->base = ci->base;
L->top = argtop;
// note: this reallocs stack, but we don't need to VM_PROTECT this
// this is because we're going to modify base/savedpc manually anyhow
// crucially, we can't use ra/argtop after this line
luaD_checkstack(L, ccl->stacksize);
LUAU_ASSERT(ci->top <= L->stack_last);
if (!ccl->isC)
{
Proto* p = ccl->l.p;
// fill unused parameters with nil
StkId argi = L->top;
StkId argend = L->base + p->numparams;
while (argi < argend)
setnilvalue(argi++); // complete missing arguments
L->top = p->is_vararg ? argi : ci->top;
// keep executing new function
ci->savedpc = p->code;
if (LUAU_LIKELY(p->execdata != NULL))
ci->flags = LUA_CALLINFO_CUSTOM;
return ccl;
}
else
{
lua_CFunction func = ccl->c.f;
int n = func(L);
// yield
if (n < 0)
return NULL;
// ci is our callinfo, cip is our parent
CallInfo* ci = L->ci;
CallInfo* cip = ci - 1;
// copy return values into parent stack (but only up to nresults!), fill the rest with nil
// note: in MULTRET context nresults starts as -1 so i != 0 condition never activates intentionally
StkId res = ci->func;
StkId vali = L->top - n;
StkId valend = L->top;
int i;
for (i = nresults; i != 0 && vali < valend; i--)
setobj2s(L, res++, vali++);
while (i-- > 0)
setnilvalue(res++);
// pop the stack frame
L->ci = cip;
L->base = cip->base;
L->top = (nresults == LUA_MULTRET) ? res : cip->top;
// keep executing current function
LUAU_ASSERT(isLua(cip));
return clvalue(cip->func);
}
}
// Extracted as-is from lvmexecute.cpp with the exception of control flow (reentry) and removed interrupts
Closure* returnFallback(lua_State* L, StkId ra, StkId valend)
{
// ci is our callinfo, cip is our parent
CallInfo* ci = L->ci;
CallInfo* cip = ci - 1;
StkId res = ci->func; // note: we assume CALL always puts func+args and expects results to start at func
StkId vali = ra;
int nresults = ci->nresults;
// copy return values into parent stack (but only up to nresults!), fill the rest with nil
// note: in MULTRET context nresults starts as -1 so i != 0 condition never activates intentionally
int i;
for (i = nresults; i != 0 && vali < valend; i--)
setobj2s(L, res++, vali++);
while (i-- > 0)
setnilvalue(res++);
// pop the stack frame
L->ci = cip;
L->base = cip->base;
L->top = (nresults == LUA_MULTRET) ? res : cip->top;
// we're done!
if (LUAU_UNLIKELY(ci->flags & LUA_CALLINFO_RETURN))
{
if (!FFlag::LuauUniformTopHandling)
L->top = res;
return NULL;
}
// keep executing new function
LUAU_ASSERT(isLua(cip));
return clvalue(cip->func);
}
const Instruction* executeGETGLOBAL(lua_State* L, const Instruction* pc, StkId base, TValue* k)
{
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
Instruction insn = *pc++;
StkId ra = VM_REG(LUAU_INSN_A(insn));
uint32_t aux = *pc++;
TValue* kv = VM_KV(aux);
LUAU_ASSERT(ttisstring(kv));
// fast-path should already have been checked, so we skip checking for it here
Table* h = cl->env;
int slot = LUAU_INSN_C(insn) & h->nodemask8;
// slow-path, may invoke Lua calls via __index metamethod
TValue g;
sethvalue(L, &g, h);
L->cachedslot = slot;
VM_PROTECT(luaV_gettable(L, &g, kv, ra));
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
VM_PATCH_C(pc - 2, L->cachedslot);
return pc;
}
const Instruction* executeSETGLOBAL(lua_State* L, const Instruction* pc, StkId base, TValue* k)
{
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
Instruction insn = *pc++;
StkId ra = VM_REG(LUAU_INSN_A(insn));
uint32_t aux = *pc++;
TValue* kv = VM_KV(aux);
LUAU_ASSERT(ttisstring(kv));
// fast-path should already have been checked, so we skip checking for it here
Table* h = cl->env;
int slot = LUAU_INSN_C(insn) & h->nodemask8;
// slow-path, may invoke Lua calls via __newindex metamethod
TValue g;
sethvalue(L, &g, h);
L->cachedslot = slot;
VM_PROTECT(luaV_settable(L, &g, kv, ra));
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
VM_PATCH_C(pc - 2, L->cachedslot);
return pc;
}
const Instruction* executeGETTABLEKS(lua_State* L, const Instruction* pc, StkId base, TValue* k)
{
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
Instruction insn = *pc++;
StkId ra = VM_REG(LUAU_INSN_A(insn));
StkId rb = VM_REG(LUAU_INSN_B(insn));
uint32_t aux = *pc++;
TValue* kv = VM_KV(aux);
LUAU_ASSERT(ttisstring(kv));
// fast-path: built-in table
if (ttistable(rb))
{
Table* h = hvalue(rb);
int slot = LUAU_INSN_C(insn) & h->nodemask8;
LuaNode* n = &h->node[slot];
// fast-path: value is in expected slot
if (LUAU_LIKELY(ttisstring(gkey(n)) && tsvalue(gkey(n)) == tsvalue(kv) && !ttisnil(gval(n))))
{
setobj2s(L, ra, gval(n));
return pc;
}
else if (!h->metatable)
{
// fast-path: value is not in expected slot, but the table lookup doesn't involve metatable
const TValue* res = luaH_getstr(h, tsvalue(kv));
if (res != luaO_nilobject)
{
int cachedslot = gval2slot(h, res);
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
VM_PATCH_C(pc - 2, cachedslot);
}
setobj2s(L, ra, res);
return pc;
}
else
{
// slow-path, may invoke Lua calls via __index metamethod
L->cachedslot = slot;
VM_PROTECT(luaV_gettable(L, rb, kv, ra));
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
VM_PATCH_C(pc - 2, L->cachedslot);
return pc;
}
}
else
{
// fast-path: user data with C __index TM
const TValue* fn = 0;
if (ttisuserdata(rb) && (fn = fasttm(L, uvalue(rb)->metatable, TM_INDEX)) && ttisfunction(fn) && clvalue(fn)->isC)
{
// note: it's safe to push arguments past top for complicated reasons (see top of the file)
LUAU_ASSERT(L->top + 3 < L->stack + L->stacksize);
StkId top = L->top;
setobj2s(L, top + 0, fn);
setobj2s(L, top + 1, rb);
setobj2s(L, top + 2, kv);
L->top = top + 3;
L->cachedslot = LUAU_INSN_C(insn);
VM_PROTECT(luaV_callTM(L, 2, LUAU_INSN_A(insn)));
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
VM_PATCH_C(pc - 2, L->cachedslot);
return pc;
}
else if (ttisvector(rb))
{
// fast-path: quick case-insensitive comparison with "X"/"Y"/"Z"
const char* name = getstr(tsvalue(kv));
int ic = (name[0] | ' ') - 'x';
#if LUA_VECTOR_SIZE == 4
// 'w' is before 'x' in ascii, so ic is -1 when indexing with 'w'
if (ic == -1)
ic = 3;
#endif
if (unsigned(ic) < LUA_VECTOR_SIZE && name[1] == '\0')
{
const float* v = rb->value.v; // silences ubsan when indexing v[]
setnvalue(ra, v[ic]);
return pc;
}
fn = fasttm(L, L->global->mt[LUA_TVECTOR], TM_INDEX);
if (fn && ttisfunction(fn) && clvalue(fn)->isC)
{
// note: it's safe to push arguments past top for complicated reasons (see top of the file)
LUAU_ASSERT(L->top + 3 < L->stack + L->stacksize);
StkId top = L->top;
setobj2s(L, top + 0, fn);
setobj2s(L, top + 1, rb);
setobj2s(L, top + 2, kv);
L->top = top + 3;
L->cachedslot = LUAU_INSN_C(insn);
VM_PROTECT(luaV_callTM(L, 2, LUAU_INSN_A(insn)));
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
VM_PATCH_C(pc - 2, L->cachedslot);
return pc;
}
// fall through to slow path
}
// fall through to slow path
}
// slow-path, may invoke Lua calls via __index metamethod
VM_PROTECT(luaV_gettable(L, rb, kv, ra));
return pc;
}
const Instruction* executeSETTABLEKS(lua_State* L, const Instruction* pc, StkId base, TValue* k)
{
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
Instruction insn = *pc++;
StkId ra = VM_REG(LUAU_INSN_A(insn));
StkId rb = VM_REG(LUAU_INSN_B(insn));
uint32_t aux = *pc++;
TValue* kv = VM_KV(aux);
LUAU_ASSERT(ttisstring(kv));
// fast-path: built-in table
if (ttistable(rb))
{
Table* h = hvalue(rb);
int slot = LUAU_INSN_C(insn) & h->nodemask8;
LuaNode* n = &h->node[slot];
// fast-path: value is in expected slot
if (LUAU_LIKELY(ttisstring(gkey(n)) && tsvalue(gkey(n)) == tsvalue(kv) && !ttisnil(gval(n)) && !h->readonly))
{
setobj2t(L, gval(n), ra);
luaC_barriert(L, h, ra);
return pc;
}
else if (fastnotm(h->metatable, TM_NEWINDEX) && !h->readonly)
{
VM_PROTECT_PC(); // set may fail
TValue* res = luaH_setstr(L, h, tsvalue(kv));
int cachedslot = gval2slot(h, res);
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
VM_PATCH_C(pc - 2, cachedslot);
setobj2t(L, res, ra);
luaC_barriert(L, h, ra);
return pc;
}
else
{
// slow-path, may invoke Lua calls via __newindex metamethod
L->cachedslot = slot;
VM_PROTECT(luaV_settable(L, rb, kv, ra));
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
VM_PATCH_C(pc - 2, L->cachedslot);
return pc;
}
}
else
{
// fast-path: user data with C __newindex TM
const TValue* fn = 0;
if (ttisuserdata(rb) && (fn = fasttm(L, uvalue(rb)->metatable, TM_NEWINDEX)) && ttisfunction(fn) && clvalue(fn)->isC)
{
// note: it's safe to push arguments past top for complicated reasons (see top of the file)
LUAU_ASSERT(L->top + 4 < L->stack + L->stacksize);
StkId top = L->top;
setobj2s(L, top + 0, fn);
setobj2s(L, top + 1, rb);
setobj2s(L, top + 2, kv);
setobj2s(L, top + 3, ra);
L->top = top + 4;
L->cachedslot = LUAU_INSN_C(insn);
VM_PROTECT(luaV_callTM(L, 3, -1));
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
VM_PATCH_C(pc - 2, L->cachedslot);
return pc;
}
else
{
// slow-path, may invoke Lua calls via __newindex metamethod
VM_PROTECT(luaV_settable(L, rb, kv, ra));
return pc;
}
}
}
const Instruction* executeNEWCLOSURE(lua_State* L, const Instruction* pc, StkId base, TValue* k)
{
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
Instruction insn = *pc++;
StkId ra = VM_REG(LUAU_INSN_A(insn));
Proto* pv = cl->l.p->p[LUAU_INSN_D(insn)];
LUAU_ASSERT(unsigned(LUAU_INSN_D(insn)) < unsigned(cl->l.p->sizep));
VM_PROTECT_PC(); // luaF_newLclosure may fail due to OOM
// note: we save closure to stack early in case the code below wants to capture it by value
Closure* ncl = luaF_newLclosure(L, pv->nups, cl->env, pv);
setclvalue(L, ra, ncl);
for (int ui = 0; ui < pv->nups; ++ui)
{
Instruction uinsn = *pc++;
LUAU_ASSERT(LUAU_INSN_OP(uinsn) == LOP_CAPTURE);
switch (LUAU_INSN_A(uinsn))
{
case LCT_VAL:
setobj(L, &ncl->l.uprefs[ui], VM_REG(LUAU_INSN_B(uinsn)));
break;
case LCT_REF:
setupvalue(L, &ncl->l.uprefs[ui], luaF_findupval(L, VM_REG(LUAU_INSN_B(uinsn))));
break;
case LCT_UPVAL:
setobj(L, &ncl->l.uprefs[ui], VM_UV(LUAU_INSN_B(uinsn)));
break;
default:
LUAU_ASSERT(!"Unknown upvalue capture type");
LUAU_UNREACHABLE(); // improves switch() codegen by eliding opcode bounds checks
}
}
VM_PROTECT(luaC_checkGC(L));
return pc;
}
const Instruction* executeNAMECALL(lua_State* L, const Instruction* pc, StkId base, TValue* k)
{
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
Instruction insn = *pc++;
StkId ra = VM_REG(LUAU_INSN_A(insn));
StkId rb = VM_REG(LUAU_INSN_B(insn));
uint32_t aux = *pc++;
TValue* kv = VM_KV(aux);
LUAU_ASSERT(ttisstring(kv));
if (ttistable(rb))
{
Table* h = hvalue(rb);
// note: we can't use nodemask8 here because we need to query the main position of the table, and 8-bit nodemask8 only works
// for predictive lookups
LuaNode* n = &h->node[tsvalue(kv)->hash & (sizenode(h) - 1)];
const TValue* mt = 0;
const LuaNode* mtn = 0;
// fast-path: key is in the table in expected slot
if (ttisstring(gkey(n)) && tsvalue(gkey(n)) == tsvalue(kv) && !ttisnil(gval(n)))
{
// note: order of copies allows rb to alias ra+1 or ra
setobj2s(L, ra + 1, rb);
setobj2s(L, ra, gval(n));
}
// fast-path: key is absent from the base, table has an __index table, and it has the result in the expected slot
else if (gnext(n) == 0 && (mt = fasttm(L, hvalue(rb)->metatable, TM_INDEX)) && ttistable(mt) &&
(mtn = &hvalue(mt)->node[LUAU_INSN_C(insn) & hvalue(mt)->nodemask8]) && ttisstring(gkey(mtn)) && tsvalue(gkey(mtn)) == tsvalue(kv) &&
!ttisnil(gval(mtn)))
{
// note: order of copies allows rb to alias ra+1 or ra
setobj2s(L, ra + 1, rb);
setobj2s(L, ra, gval(mtn));
}
else
{
// slow-path: handles full table lookup
setobj2s(L, ra + 1, rb);
L->cachedslot = LUAU_INSN_C(insn);
VM_PROTECT(luaV_gettable(L, rb, kv, ra));
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
VM_PATCH_C(pc - 2, L->cachedslot);
// recompute ra since stack might have been reallocated
ra = VM_REG(LUAU_INSN_A(insn));
if (ttisnil(ra))
luaG_methoderror(L, ra + 1, tsvalue(kv));
}
}
else
{
Table* mt = ttisuserdata(rb) ? uvalue(rb)->metatable : L->global->mt[ttype(rb)];
const TValue* tmi = 0;
// fast-path: metatable with __namecall
if (const TValue* fn = fasttm(L, mt, TM_NAMECALL))
{
// note: order of copies allows rb to alias ra+1 or ra
setobj2s(L, ra + 1, rb);
setobj2s(L, ra, fn);
L->namecall = tsvalue(kv);
}
else if ((tmi = fasttm(L, mt, TM_INDEX)) && ttistable(tmi))
{
Table* h = hvalue(tmi);
int slot = LUAU_INSN_C(insn) & h->nodemask8;
LuaNode* n = &h->node[slot];
// fast-path: metatable with __index that has method in expected slot
if (LUAU_LIKELY(ttisstring(gkey(n)) && tsvalue(gkey(n)) == tsvalue(kv) && !ttisnil(gval(n))))
{
// note: order of copies allows rb to alias ra+1 or ra
setobj2s(L, ra + 1, rb);
setobj2s(L, ra, gval(n));
}
else
{
// slow-path: handles slot mismatch
setobj2s(L, ra + 1, rb);
L->cachedslot = slot;
VM_PROTECT(luaV_gettable(L, rb, kv, ra));
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
VM_PATCH_C(pc - 2, L->cachedslot);
// recompute ra since stack might have been reallocated
ra = VM_REG(LUAU_INSN_A(insn));
if (ttisnil(ra))
luaG_methoderror(L, ra + 1, tsvalue(kv));
}
}
else
{
// slow-path: handles non-table __index
setobj2s(L, ra + 1, rb);
VM_PROTECT(luaV_gettable(L, rb, kv, ra));
// recompute ra since stack might have been reallocated
ra = VM_REG(LUAU_INSN_A(insn));
if (ttisnil(ra))
luaG_methoderror(L, ra + 1, tsvalue(kv));
}
}
// intentional fallthrough to CALL
LUAU_ASSERT(LUAU_INSN_OP(*pc) == LOP_CALL);
return pc;
}
const Instruction* executeSETLIST(lua_State* L, const Instruction* pc, StkId base, TValue* k)
{
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
Instruction insn = *pc++;
StkId ra = VM_REG(LUAU_INSN_A(insn));
StkId rb = &base[LUAU_INSN_B(insn)]; // note: this can point to L->top if c == LUA_MULTRET making VM_REG unsafe to use
int c = LUAU_INSN_C(insn) - 1;
uint32_t index = *pc++;
if (c == LUA_MULTRET)
{
c = int(L->top - rb);
L->top = L->ci->top;
}
Table* h = hvalue(ra);
// TODO: we really don't need this anymore
if (!ttistable(ra))
return NULL; // temporary workaround to weaken a rather powerful exploitation primitive in case of a MITM attack on bytecode
int last = index + c - 1;
if (last > h->sizearray)
{
VM_PROTECT_PC(); // luaH_resizearray may fail due to OOM
luaH_resizearray(L, h, last);
}
TValue* array = h->array;
for (int i = 0; i < c; ++i)
setobj2t(L, &array[index + i - 1], rb + i);
luaC_barrierfast(L, h);
return pc;
}
const Instruction* executeFORGPREP(lua_State* L, const Instruction* pc, StkId base, TValue* k)
{
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
Instruction insn = *pc++;
StkId ra = VM_REG(LUAU_INSN_A(insn));
if (ttisfunction(ra))
{
// will be called during FORGLOOP
}
else
{
Table* mt = ttistable(ra) ? hvalue(ra)->metatable : ttisuserdata(ra) ? uvalue(ra)->metatable : cast_to(Table*, NULL);
if (const TValue* fn = fasttm(L, mt, TM_ITER))
{
setobj2s(L, ra + 1, ra);
setobj2s(L, ra, fn);
L->top = ra + 2; // func + self arg
LUAU_ASSERT(L->top <= L->stack_last);
VM_PROTECT(luaD_call(L, ra, 3));
L->top = L->ci->top;
// recompute ra since stack might have been reallocated
ra = VM_REG(LUAU_INSN_A(insn));
// protect against __iter returning nil, since nil is used as a marker for builtin iteration in FORGLOOP
if (ttisnil(ra))
{
VM_PROTECT_PC(); // next call always errors
luaG_typeerror(L, ra, "call");
}
}
else if (fasttm(L, mt, TM_CALL))
{
// table or userdata with __call, will be called during FORGLOOP
// TODO: we might be able to stop supporting this depending on whether it's used in practice
}
else if (ttistable(ra))
{
// set up registers for builtin iteration
setobj2s(L, ra + 1, ra);
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(0)));
setnilvalue(ra);
}
else
{
VM_PROTECT_PC(); // next call always errors
luaG_typeerror(L, ra, "iterate over");
}
}
pc += LUAU_INSN_D(insn);
LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
return pc;
}
const Instruction* executeGETVARARGS(lua_State* L, const Instruction* pc, StkId base, TValue* k)
{
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
Instruction insn = *pc++;
int b = LUAU_INSN_B(insn) - 1;
int n = cast_int(base - L->ci->func) - cl->l.p->numparams - 1;
if (b == LUA_MULTRET)
{
VM_PROTECT(luaD_checkstack(L, n));
StkId ra = VM_REG(LUAU_INSN_A(insn)); // previous call may change the stack
for (int j = 0; j < n; j++)
setobj2s(L, ra + j, base - n + j);
L->top = ra + n;
return pc;
}
else
{
StkId ra = VM_REG(LUAU_INSN_A(insn));
for (int j = 0; j < b && j < n; j++)
setobj2s(L, ra + j, base - n + j);
for (int j = n; j < b; j++)
setnilvalue(ra + j);
return pc;
}
}
const Instruction* executeDUPCLOSURE(lua_State* L, const Instruction* pc, StkId base, TValue* k)
{
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
Instruction insn = *pc++;
StkId ra = VM_REG(LUAU_INSN_A(insn));
TValue* kv = VM_KV(LUAU_INSN_D(insn));
Closure* kcl = clvalue(kv);
VM_PROTECT_PC(); // luaF_newLclosure may fail due to OOM
// clone closure if the environment is not shared
// note: we save closure to stack early in case the code below wants to capture it by value
Closure* ncl = (kcl->env == cl->env) ? kcl : luaF_newLclosure(L, kcl->nupvalues, cl->env, kcl->l.p);
setclvalue(L, ra, ncl);
// this loop does three things:
// - if the closure was created anew, it just fills it with upvalues
// - if the closure from the constant table is used, it fills it with upvalues so that it can be shared in the future
// - if the closure is reused, it checks if the reuse is safe via rawequal, and falls back to duplicating the closure
// normally this would use two separate loops, for reuse check and upvalue setup, but MSVC codegen goes crazy if you do that
for (int ui = 0; ui < kcl->nupvalues; ++ui)
{
Instruction uinsn = pc[ui];
LUAU_ASSERT(LUAU_INSN_OP(uinsn) == LOP_CAPTURE);
LUAU_ASSERT(LUAU_INSN_A(uinsn) == LCT_VAL || LUAU_INSN_A(uinsn) == LCT_UPVAL);
TValue* uv = (LUAU_INSN_A(uinsn) == LCT_VAL) ? VM_REG(LUAU_INSN_B(uinsn)) : VM_UV(LUAU_INSN_B(uinsn));
// check if the existing closure is safe to reuse
if (ncl == kcl && luaO_rawequalObj(&ncl->l.uprefs[ui], uv))
continue;
// lazily clone the closure and update the upvalues
if (ncl == kcl && kcl->preload == 0)
{
ncl = luaF_newLclosure(L, kcl->nupvalues, cl->env, kcl->l.p);
setclvalue(L, ra, ncl);
ui = -1; // restart the loop to fill all upvalues
continue;
}
// this updates a newly created closure, or an existing closure created during preload, in which case we need a barrier
setobj(L, &ncl->l.uprefs[ui], uv);
luaC_barrier(L, ncl, uv);
}
// this is a noop if ncl is newly created or shared successfully, but it has to run after the closure is preloaded for the first time
ncl->preload = 0;
if (kcl != ncl)
VM_PROTECT(luaC_checkGC(L));
pc += kcl->nupvalues;
return pc;
}
const Instruction* executePREPVARARGS(lua_State* L, const Instruction* pc, StkId base, TValue* k)
{
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
Instruction insn = *pc++;
int numparams = LUAU_INSN_A(insn);
// all fixed parameters are copied after the top so we need more stack space
VM_PROTECT(luaD_checkstack(L, cl->stacksize + numparams));
// the caller must have filled extra fixed arguments with nil
LUAU_ASSERT(cast_int(L->top - base) >= numparams);
// move fixed parameters to final position
StkId fixed = base; // first fixed argument
base = L->top; // final position of first argument
for (int i = 0; i < numparams; ++i)
{
setobj2s(L, base + i, fixed + i);
setnilvalue(fixed + i);
}
// rewire our stack frame to point to the new base
L->ci->base = base;
L->ci->top = base + cl->stacksize;
L->base = base;
L->top = L->ci->top;
return pc;
}
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,36 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "lobject.h"
namespace Luau
{
namespace CodeGen
{
bool forgLoopTableIter(lua_State* L, Table* h, int index, TValue* ra);
bool forgLoopNodeIter(lua_State* L, Table* h, int index, TValue* ra);
bool forgLoopNonTableFallback(lua_State* L, int insnA, int aux);
void forgPrepXnextFallback(lua_State* L, TValue* ra, int pc);
Closure* callProlog(lua_State* L, TValue* ra, StkId argtop, int nresults);
void callEpilogC(lua_State* L, int nresults, int n);
Closure* callFallback(lua_State* L, StkId ra, StkId argtop, int nresults);
Closure* returnFallback(lua_State* L, StkId ra, StkId valend);
const Instruction* executeGETGLOBAL(lua_State* L, const Instruction* pc, StkId base, TValue* k);
const Instruction* executeSETGLOBAL(lua_State* L, const Instruction* pc, StkId base, TValue* k);
const Instruction* executeGETTABLEKS(lua_State* L, const Instruction* pc, StkId base, TValue* k);
const Instruction* executeSETTABLEKS(lua_State* L, const Instruction* pc, StkId base, TValue* k);
const Instruction* executeNEWCLOSURE(lua_State* L, const Instruction* pc, StkId base, TValue* k);
const Instruction* executeNAMECALL(lua_State* L, const Instruction* pc, StkId base, TValue* k);
const Instruction* executeSETLIST(lua_State* L, const Instruction* pc, StkId base, TValue* k);
const Instruction* executeFORGPREP(lua_State* L, const Instruction* pc, StkId base, TValue* k);
const Instruction* executeGETVARARGS(lua_State* L, const Instruction* pc, StkId base, TValue* k);
const Instruction* executeDUPCLOSURE(lua_State* L, const Instruction* pc, StkId base, TValue* k);
const Instruction* executePREPVARARGS(lua_State* L, const Instruction* pc, StkId base, TValue* k);
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,197 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "CodeGenX64.h"
#include "Luau/AssemblyBuilderX64.h"
#include "Luau/UnwindBuilder.h"
#include "CustomExecUtils.h"
#include "NativeState.h"
#include "EmitCommonX64.h"
#include "lstate.h"
/* An overview of native environment stack setup that we are making in the entry function:
* Each line is 8 bytes, stack grows downwards.
*
* | ... previous frames ...
* | rdx home space | (unused)
* | rcx home space | (unused)
* | return address |
* | ... saved non-volatile registers ... <-- rsp + kStackSize + kLocalsSize
* | unused | for 16 byte alignment of the stack
* | sCode |
* | sClosure | <-- rsp + kStackSize
* | argument 6 | <-- rsp + 40
* | argument 5 | <-- rsp + 32
* | r9 home space |
* | r8 home space |
* | rdx home space |
* | rcx home space | <-- rsp points here
*
* Arguments to our entry function are saved to home space only on Windows.
* Space for arguments to function we call is always reserved, but used only on Windows.
*
* Right now we use a frame pointer, but because of a fixed layout we can omit it in the future
*/
namespace Luau
{
namespace CodeGen
{
namespace X64
{
struct EntryLocations
{
Label start;
Label prologueEnd;
Label epilogueStart;
};
static EntryLocations buildEntryFunction(AssemblyBuilderX64& build, UnwindBuilder& unwind)
{
EntryLocations locations;
build.align(kFunctionAlignment, X64::AlignmentDataX64::Ud2);
locations.start = build.setLabel();
unwind.startFunction();
// Save common non-volatile registers
if (build.abi == ABIX64::SystemV)
{
// We need to use a standard rbp-based frame setup for debuggers to work with JIT code
build.push(rbp);
build.mov(rbp, rsp);
}
build.push(rbx);
build.push(r12);
build.push(r13);
build.push(r14);
build.push(r15);
if (build.abi == ABIX64::Windows)
{
// Save non-volatile registers that are specific to Windows x64 ABI
build.push(rdi);
build.push(rsi);
// On Windows, rbp is available as a general-purpose non-volatile register; we currently don't use it, but we need to push an even number
// of registers for stack alignment...
build.push(rbp);
// TODO: once we start using non-volatile SIMD registers on Windows, we will save those here
}
// Allocate stack space (reg home area + local data)
build.sub(rsp, kStackSize + kLocalsSize);
locations.prologueEnd = build.setLabel();
uint32_t prologueSize = build.getLabelOffset(locations.prologueEnd) - build.getLabelOffset(locations.start);
if (build.abi == ABIX64::SystemV)
unwind.prologueX64(prologueSize, kStackSize + kLocalsSize, /* setupFrame= */ true, {rbx, r12, r13, r14, r15});
else if (build.abi == ABIX64::Windows)
unwind.prologueX64(prologueSize, kStackSize + kLocalsSize, /* setupFrame= */ false, {rbx, r12, r13, r14, r15, rdi, rsi, rbp});
// Setup native execution environment
build.mov(rState, rArg1);
build.mov(rNativeContext, rArg4);
build.mov(rBase, qword[rState + offsetof(lua_State, base)]); // L->base
build.mov(rax, qword[rState + offsetof(lua_State, ci)]); // L->ci
build.mov(rax, qword[rax + offsetof(CallInfo, func)]); // L->ci->func
build.mov(rax, qword[rax + offsetof(TValue, value.gc)]); // L->ci->func->value.gc aka cl
build.mov(sClosure, rax);
build.mov(rConstants, qword[rArg2 + offsetof(Proto, k)]); // proto->k
build.mov(rax, qword[rArg2 + offsetof(Proto, code)]); // proto->code
build.mov(sCode, rax);
// Jump to the specified instruction; further control flow will be handled with custom ABI with register setup from EmitCommonX64.h
build.jmp(rArg3);
// Even though we jumped away, we will return here in the end
locations.epilogueStart = build.setLabel();
// Cleanup and exit
build.add(rsp, kStackSize + kLocalsSize);
if (build.abi == ABIX64::Windows)
{
build.pop(rbp);
build.pop(rsi);
build.pop(rdi);
}
build.pop(r15);
build.pop(r14);
build.pop(r13);
build.pop(r12);
build.pop(rbx);
if (build.abi == ABIX64::SystemV)
build.pop(rbp);
build.ret();
// Our entry function is special, it spans the whole remaining code area
unwind.finishFunction(build.getLabelOffset(locations.start), kFullBlockFuncton);
return locations;
}
bool initHeaderFunctions(NativeState& data)
{
AssemblyBuilderX64 build(/* logText= */ false);
UnwindBuilder& unwind = *data.unwindBuilder.get();
unwind.startInfo(UnwindBuilder::X64);
EntryLocations entryLocations = buildEntryFunction(build, unwind);
build.finalize();
unwind.finishInfo();
LUAU_ASSERT(build.data.empty());
uint8_t* codeStart = nullptr;
if (!data.codeAllocator.allocate(
build.data.data(), int(build.data.size()), build.code.data(), int(build.code.size()), data.gateData, data.gateDataSize, codeStart))
{
LUAU_ASSERT(!"failed to create entry function");
return false;
}
// Set the offset at the begining so that functions in new blocks will not overlay the locations
// specified by the unwind information of the entry function
unwind.setBeginOffset(build.getLabelOffset(entryLocations.prologueEnd));
data.context.gateEntry = codeStart + build.getLabelOffset(entryLocations.start);
data.context.gateExit = codeStart + build.getLabelOffset(entryLocations.epilogueStart);
return true;
}
void assembleHelpers(X64::AssemblyBuilderX64& build, ModuleHelpers& helpers)
{
if (build.logText)
build.logAppend("; exitContinueVm\n");
helpers.exitContinueVm = build.setLabel();
emitExit(build, /* continueInVm */ true);
if (build.logText)
build.logAppend("; exitNoContinueVm\n");
helpers.exitNoContinueVm = build.setLabel();
emitExit(build, /* continueInVm */ false);
if (build.logText)
build.logAppend("; continueCallInVm\n");
helpers.continueCallInVm = build.setLabel();
emitContinueCallInVm(build);
}
} // namespace X64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,22 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
namespace Luau
{
namespace CodeGen
{
struct NativeState;
struct ModuleHelpers;
namespace X64
{
class AssemblyBuilderX64;
bool initHeaderFunctions(NativeState& data);
void assembleHelpers(AssemblyBuilderX64& build, ModuleHelpers& helpers);
} // namespace X64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,106 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "NativeState.h"
#include "lobject.h"
#include "lstate.h"
namespace Luau
{
namespace CodeGen
{
// Here we define helper functions to wrap interaction with Luau custom execution API so that it works with or without LUA_CUSTOM_EXECUTION
#if LUA_CUSTOM_EXECUTION
inline lua_ExecutionCallbacks* getExecutionCallbacks(lua_State* L)
{
return &L->global->ecb;
}
inline NativeState* getNativeState(lua_State* L)
{
lua_ExecutionCallbacks* ecb = getExecutionCallbacks(L);
return (NativeState*)ecb->context;
}
inline void setNativeState(lua_State* L, NativeState* nativeState)
{
lua_ExecutionCallbacks* ecb = getExecutionCallbacks(L);
ecb->context = nativeState;
}
inline NativeState* createNativeState(lua_State* L)
{
NativeState* state = new NativeState();
setNativeState(L, state);
return state;
}
inline void destroyNativeState(lua_State* L)
{
NativeState* state = getNativeState(L);
setNativeState(L, nullptr);
delete state;
}
#else
inline lua_ExecutionCallbacks* getExecutionCallbacks(lua_State* L)
{
return nullptr;
}
inline NativeState* getNativeState(lua_State* L)
{
return nullptr;
}
inline void setNativeState(lua_State* L, NativeState* nativeState) {}
inline NativeState* createNativeState(lua_State* L)
{
return nullptr;
}
inline void destroyNativeState(lua_State* L) {}
#endif
inline int getOpLength(LuauOpcode op)
{
switch (op)
{
case LOP_GETGLOBAL:
case LOP_SETGLOBAL:
case LOP_GETIMPORT:
case LOP_GETTABLEKS:
case LOP_SETTABLEKS:
case LOP_NAMECALL:
case LOP_JUMPIFEQ:
case LOP_JUMPIFLE:
case LOP_JUMPIFLT:
case LOP_JUMPIFNOTEQ:
case LOP_JUMPIFNOTLE:
case LOP_JUMPIFNOTLT:
case LOP_NEWTABLE:
case LOP_SETLIST:
case LOP_FORGLOOP:
case LOP_LOADKX:
case LOP_FASTCALL2:
case LOP_FASTCALL2K:
case LOP_JUMPXEQKNIL:
case LOP_JUMPXEQKB:
case LOP_JUMPXEQKN:
case LOP_JUMPXEQKS:
return 2;
default:
return 1;
}
}
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,128 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "EmitBuiltinsX64.h"
#include "Luau/AssemblyBuilderX64.h"
#include "Luau/Bytecode.h"
#include "Luau/IrCallWrapperX64.h"
#include "Luau/IrRegAllocX64.h"
#include "EmitCommonX64.h"
#include "NativeState.h"
#include "lstate.h"
namespace Luau
{
namespace CodeGen
{
namespace X64
{
static void emitBuiltinMathFrexp(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int arg, int nresults)
{
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::xmmword, luauRegValue(arg));
callWrap.addArgument(SizeX64::qword, sTemporarySlot);
callWrap.call(qword[rNativeContext + offsetof(NativeContext, libm_frexp)]);
build.vmovsd(luauRegValue(ra), xmm0);
if (nresults > 1)
{
build.vcvtsi2sd(xmm0, xmm0, dword[sTemporarySlot + 0]);
build.vmovsd(luauRegValue(ra + 1), xmm0);
}
}
static void emitBuiltinMathModf(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int arg, int nresults)
{
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::xmmword, luauRegValue(arg));
callWrap.addArgument(SizeX64::qword, sTemporarySlot);
callWrap.call(qword[rNativeContext + offsetof(NativeContext, libm_modf)]);
build.vmovsd(xmm1, qword[sTemporarySlot + 0]);
build.vmovsd(luauRegValue(ra), xmm1);
if (nresults > 1)
build.vmovsd(luauRegValue(ra + 1), xmm0);
}
static void emitBuiltinMathSign(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int arg)
{
ScopedRegX64 tmp0{regs, SizeX64::xmmword};
ScopedRegX64 tmp1{regs, SizeX64::xmmword};
ScopedRegX64 tmp2{regs, SizeX64::xmmword};
ScopedRegX64 tmp3{regs, SizeX64::xmmword};
build.vmovsd(tmp0.reg, luauRegValue(arg));
build.vxorpd(tmp1.reg, tmp1.reg, tmp1.reg);
// Set tmp2 to -1 if arg < 0, else 0
build.vcmpltsd(tmp2.reg, tmp0.reg, tmp1.reg);
build.vmovsd(tmp3.reg, build.f64(-1));
build.vandpd(tmp2.reg, tmp2.reg, tmp3.reg);
// Set mask bit to 1 if 0 < arg, else 0
build.vcmpltsd(tmp0.reg, tmp1.reg, tmp0.reg);
// Result = (mask-bit == 1) ? 1.0 : tmp2
// If arg < 0 then tmp2 is -1 and mask-bit is 0, result is -1
// If arg == 0 then tmp2 is 0 and mask-bit is 0, result is 0
// If arg > 0 then tmp2 is 0 and mask-bit is 1, result is 1
build.vblendvpd(tmp0.reg, tmp2.reg, build.f64x2(1, 1), tmp0.reg);
build.vmovsd(luauRegValue(ra), tmp0.reg);
}
static void emitBuiltinType(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int arg)
{
ScopedRegX64 tmp0{regs, SizeX64::qword};
ScopedRegX64 tag{regs, SizeX64::dword};
build.mov(tag.reg, luauRegTag(arg));
build.mov(tmp0.reg, qword[rState + offsetof(lua_State, global)]);
build.mov(tmp0.reg, qword[tmp0.reg + qwordReg(tag.reg) * sizeof(TString*) + offsetof(global_State, ttname)]);
build.mov(luauRegValue(ra), tmp0.reg);
}
static void emitBuiltinTypeof(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int arg)
{
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, luauRegAddress(arg));
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaT_objtypenamestr)]);
build.mov(luauRegValue(ra), rax);
}
void emitBuiltin(IrRegAllocX64& regs, AssemblyBuilderX64& build, int bfid, int ra, int arg, OperandX64 arg2, int nparams, int nresults)
{
switch (bfid)
{
case LBF_MATH_FREXP:
LUAU_ASSERT(nparams == 1 && (nresults == 1 || nresults == 2));
return emitBuiltinMathFrexp(regs, build, ra, arg, nresults);
case LBF_MATH_MODF:
LUAU_ASSERT(nparams == 1 && (nresults == 1 || nresults == 2));
return emitBuiltinMathModf(regs, build, ra, arg, nresults);
case LBF_MATH_SIGN:
LUAU_ASSERT(nparams == 1 && nresults == 1);
return emitBuiltinMathSign(regs, build, ra, arg);
case LBF_TYPE:
LUAU_ASSERT(nparams == 1 && nresults == 1);
return emitBuiltinType(regs, build, ra, arg);
case LBF_TYPEOF:
LUAU_ASSERT(nparams == 1 && nresults == 1);
return emitBuiltinTypeof(regs, build, ra, arg);
default:
LUAU_ASSERT(!"Missing x64 lowering");
break;
}
}
} // namespace X64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,23 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
namespace Luau
{
namespace CodeGen
{
struct Label;
struct IrOp;
namespace X64
{
class AssemblyBuilderX64;
struct OperandX64;
struct IrRegAllocX64;
void emitBuiltin(IrRegAllocX64& regs, AssemblyBuilderX64& build, int bfid, int ra, int arg, OperandX64 arg2, int nparams, int nresults);
} // namespace X64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,37 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/Label.h"
namespace Luau
{
namespace CodeGen
{
constexpr unsigned kTValueSizeLog2 = 4;
constexpr unsigned kLuaNodeSizeLog2 = 5;
// TKey.tt and TKey.next are packed together in a bitfield
constexpr unsigned kOffsetOfTKeyTagNext = 12; // offsetof cannot be used on a bit field
constexpr unsigned kTKeyTagBits = 4;
constexpr unsigned kTKeyTagMask = (1 << kTKeyTagBits) - 1;
constexpr unsigned kOffsetOfInstructionC = 3;
// Leaf functions that are placed in every module to perform common instruction sequences
struct ModuleHelpers
{
// A64/X64
Label exitContinueVm;
Label exitNoContinueVm;
// X64
Label continueCallInVm;
// A64
Label reentry; // x0: closure
Label interrupt; // x0: pc offset, x1: return address, x2: interrupt
};
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,59 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/AssemblyBuilderA64.h"
#include "EmitCommon.h"
#include "lobject.h"
#include "ltm.h"
#include "lstate.h"
// AArch64 ABI reminder:
// Arguments: x0-x7, v0-v7
// Return: x0, v0 (or x8 that points to the address of the resulting structure)
// Volatile: x9-x15, v16-v31 ("caller-saved", any call may change them)
// Intra-procedure-call temporary: x16-x17 (any call or relocated jump may change them, as linker may point branches to veneers to perform long jumps)
// Non-volatile: x19-x28, v8-v15 ("callee-saved", preserved after calls, only bottom half of SIMD registers is preserved!)
// Reserved: x18: reserved for platform use; x29: frame pointer (unless omitted); x30: link register; x31: stack pointer
namespace Luau
{
namespace CodeGen
{
struct NativeState;
namespace A64
{
// Data that is very common to access is placed in non-volatile registers:
// 1. Constant registers (only loaded during codegen entry)
constexpr RegisterA64 rState = x19; // lua_State* L
constexpr RegisterA64 rNativeContext = x20; // NativeContext* context
// 2. Frame registers (reloaded when call frame changes; rBase is also reloaded after all calls that may reallocate stack)
constexpr RegisterA64 rConstants = x21; // TValue* k
constexpr RegisterA64 rClosure = x22; // Closure* cl
constexpr RegisterA64 rCode = x23; // Instruction* code
constexpr RegisterA64 rBase = x24; // StkId base
// Native code is as stackless as the interpreter, so we can place some data on the stack once and have it accessible at any point
// See CodeGenA64.cpp for layout
constexpr unsigned kStashSlots = 8; // stashed non-volatile registers
constexpr unsigned kSpillSlots = 22; // slots for spilling temporary registers
constexpr unsigned kTempSlots = 2; // 16 bytes of temporary space, such luxury!
constexpr unsigned kStackSize = (kStashSlots + kSpillSlots + kTempSlots) * 8;
constexpr AddressA64 sSpillArea = mem(sp, kStashSlots * 8);
constexpr AddressA64 sTemporary = mem(sp, (kStashSlots + kSpillSlots) * 8);
inline void emitUpdateBase(AssemblyBuilderA64& build)
{
build.ldr(rBase, mem(rState, offsetof(lua_State, base)));
}
} // namespace A64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,358 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "EmitCommonX64.h"
#include "Luau/AssemblyBuilderX64.h"
#include "Luau/IrCallWrapperX64.h"
#include "Luau/IrData.h"
#include "Luau/IrRegAllocX64.h"
#include "CustomExecUtils.h"
#include "NativeState.h"
#include "lgc.h"
#include "lstate.h"
namespace Luau
{
namespace CodeGen
{
namespace X64
{
void jumpOnNumberCmp(AssemblyBuilderX64& build, RegisterX64 tmp, OperandX64 lhs, OperandX64 rhs, IrCondition cond, Label& label)
{
// Refresher on comi/ucomi EFLAGS:
// CF only: less
// ZF only: equal
// PF+CF+ZF: unordered (NaN)
if (rhs.cat == CategoryX64::reg)
{
build.vucomisd(rhs, lhs);
}
else
{
build.vmovsd(tmp, rhs);
build.vucomisd(tmp, lhs);
}
// Keep in mind that 'Not' conditions want 'true' for comparisons with NaN
// And because of NaN, integer check interchangeability like 'not less or equal' <-> 'greater' does not hold
switch (cond)
{
case IrCondition::NotLessEqual:
// (b < a) is the same as !(a <= b). jnae checks CF=1 which means < or NaN
build.jcc(ConditionX64::NotAboveEqual, label);
break;
case IrCondition::LessEqual:
// (b >= a) is the same as (a <= b). jae checks CF=0 which means >= and not NaN
build.jcc(ConditionX64::AboveEqual, label);
break;
case IrCondition::NotLess:
// (b <= a) is the same as !(a < b). jna checks CF=1 or ZF=1 which means <= or NaN
build.jcc(ConditionX64::NotAbove, label);
break;
case IrCondition::Less:
// (b > a) is the same as (a < b). ja checks CF=0 and ZF=0 which means > and not NaN
build.jcc(ConditionX64::Above, label);
break;
case IrCondition::NotEqual:
// ZF=0 or PF=1 means != or NaN
build.jcc(ConditionX64::NotZero, label);
build.jcc(ConditionX64::Parity, label);
break;
default:
LUAU_ASSERT(!"Unsupported condition");
}
}
void jumpOnAnyCmpFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, IrCondition cond, Label& label)
{
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
if (cond == IrCondition::NotLessEqual || cond == IrCondition::LessEqual)
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_lessequal)]);
else if (cond == IrCondition::NotLess || cond == IrCondition::Less)
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_lessthan)]);
else if (cond == IrCondition::NotEqual || cond == IrCondition::Equal)
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_equalval)]);
else
LUAU_ASSERT(!"Unsupported condition");
emitUpdateBase(build);
build.test(eax, eax);
build.jcc(cond == IrCondition::NotLessEqual || cond == IrCondition::NotLess || cond == IrCondition::NotEqual ? ConditionX64::Zero
: ConditionX64::NotZero,
label);
}
void getTableNodeAtCachedSlot(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 node, RegisterX64 table, int pcpos)
{
LUAU_ASSERT(tmp != node);
LUAU_ASSERT(table != node);
build.mov(node, qword[table + offsetof(Table, node)]);
// compute cached slot
build.mov(tmp, sCode);
build.movzx(dwordReg(tmp), byte[tmp + pcpos * sizeof(Instruction) + kOffsetOfInstructionC]);
build.and_(byteReg(tmp), byte[table + offsetof(Table, nodemask8)]);
// LuaNode* n = &h->node[slot];
build.shl(dwordReg(tmp), kLuaNodeSizeLog2);
build.add(node, tmp);
}
void convertNumberToIndexOrJump(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 numd, RegisterX64 numi, Label& label)
{
LUAU_ASSERT(numi.size == SizeX64::dword);
// Convert to integer, NaN is converted into 0x80000000
build.vcvttsd2si(numi, numd);
// Convert that integer back to double
build.vcvtsi2sd(tmp, numd, numi);
build.vucomisd(tmp, numd); // Sets ZF=1 if equal or NaN
// We don't need non-integer values
// But to skip the PF=1 check, we proceed with NaN because 0x80000000 index is out of bounds
build.jcc(ConditionX64::NotZero, label);
}
void callArithHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, OperandX64 c, TMS tm)
{
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
callWrap.addArgument(SizeX64::qword, c);
callWrap.addArgument(SizeX64::dword, tm);
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_doarith)]);
emitUpdateBase(build);
}
void callLengthHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb)
{
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_dolen)]);
emitUpdateBase(build);
}
void callPrepareForN(IrRegAllocX64& regs, AssemblyBuilderX64& build, int limit, int step, int init)
{
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, luauRegAddress(limit));
callWrap.addArgument(SizeX64::qword, luauRegAddress(step));
callWrap.addArgument(SizeX64::qword, luauRegAddress(init));
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_prepareFORN)]);
}
void callGetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra)
{
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
callWrap.addArgument(SizeX64::qword, c);
callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_gettable)]);
emitUpdateBase(build);
}
void callSetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra)
{
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
callWrap.addArgument(SizeX64::qword, c);
callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_settable)]);
emitUpdateBase(build);
}
void checkObjectBarrierConditions(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 object, int ra, Label& skip)
{
// iscollectable(ra)
build.cmp(luauRegTag(ra), LUA_TSTRING);
build.jcc(ConditionX64::Less, skip);
// isblack(obj2gco(o))
build.test(byte[object + offsetof(GCheader, marked)], bitmask(BLACKBIT));
build.jcc(ConditionX64::Zero, skip);
// iswhite(gcvalue(ra))
build.mov(tmp, luauRegValue(ra));
build.test(byte[tmp + offsetof(GCheader, marked)], bit2mask(WHITE0BIT, WHITE1BIT));
build.jcc(ConditionX64::Zero, skip);
}
void callBarrierObject(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 object, IrOp objectOp, int ra)
{
Label skip;
ScopedRegX64 tmp{regs, SizeX64::qword};
checkObjectBarrierConditions(build, tmp.reg, object, ra, skip);
{
ScopedSpills spillGuard(regs);
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, object, objectOp);
callWrap.addArgument(SizeX64::qword, tmp);
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_barrierf)]);
}
build.setLabel(skip);
}
void callBarrierTableFast(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 table, IrOp tableOp)
{
Label skip;
// isblack(obj2gco(t))
build.test(byte[table + offsetof(GCheader, marked)], bitmask(BLACKBIT));
build.jcc(ConditionX64::Zero, skip);
{
ScopedSpills spillGuard(regs);
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, table, tableOp);
callWrap.addArgument(SizeX64::qword, addr[table + offsetof(Table, gclist)]);
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_barrierback)]);
}
build.setLabel(skip);
}
void callStepGc(IrRegAllocX64& regs, AssemblyBuilderX64& build)
{
Label skip;
{
ScopedRegX64 tmp1{regs, SizeX64::qword};
ScopedRegX64 tmp2{regs, SizeX64::qword};
build.mov(tmp1.reg, qword[rState + offsetof(lua_State, global)]);
build.mov(tmp2.reg, qword[tmp1.reg + offsetof(global_State, totalbytes)]);
build.cmp(tmp2.reg, qword[tmp1.reg + offsetof(global_State, GCthreshold)]);
build.jcc(ConditionX64::Below, skip);
}
{
ScopedSpills spillGuard(regs);
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::dword, 1);
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_step)]);
emitUpdateBase(build);
}
build.setLabel(skip);
}
void emitExit(AssemblyBuilderX64& build, bool continueInVm)
{
if (continueInVm)
build.mov(eax, 1);
else
build.xor_(eax, eax);
build.jmp(qword[rNativeContext + offsetof(NativeContext, gateExit)]);
}
void emitUpdateBase(AssemblyBuilderX64& build)
{
build.mov(rBase, qword[rState + offsetof(lua_State, base)]);
}
static void emitSetSavedPc(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos)
{
ScopedRegX64 tmp1{regs, SizeX64::qword};
ScopedRegX64 tmp2{regs, SizeX64::qword};
build.mov(tmp1.reg, sCode);
build.add(tmp1.reg, pcpos * sizeof(Instruction));
build.mov(tmp2.reg, qword[rState + offsetof(lua_State, ci)]);
build.mov(qword[tmp2.reg + offsetof(CallInfo, savedpc)], tmp1.reg);
}
void emitInterrupt(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos)
{
Label skip;
ScopedRegX64 tmp{regs, SizeX64::qword};
// Skip if there is no interrupt set
build.mov(tmp.reg, qword[rState + offsetof(lua_State, global)]);
build.mov(tmp.reg, qword[tmp.reg + offsetof(global_State, cb.interrupt)]);
build.test(tmp.reg, tmp.reg);
build.jcc(ConditionX64::Zero, skip);
emitSetSavedPc(regs, build, pcpos + 1);
// Call interrupt
// TODO: This code should move to the end of the function, or even be outlined so that it can be shared by multiple interruptible instructions
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::dword, -1);
callWrap.call(tmp.release());
emitUpdateBase(build); // interrupt may have reallocated stack
// Check if we need to exit
build.mov(al, byte[rState + offsetof(lua_State, status)]);
build.test(al, al);
build.jcc(ConditionX64::Zero, skip);
build.mov(rax, qword[rState + offsetof(lua_State, ci)]);
build.sub(qword[rax + offsetof(CallInfo, savedpc)], sizeof(Instruction));
emitExit(build, /* continueInVm */ false);
build.setLabel(skip);
}
void emitFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int offset, int pcpos)
{
// fallback(L, instruction, base, k)
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::qword, rState);
RegisterX64 reg = callWrap.suggestNextArgumentRegister(SizeX64::qword);
build.mov(reg, sCode);
callWrap.addArgument(SizeX64::qword, addr[reg + pcpos * sizeof(Instruction)]);
callWrap.addArgument(SizeX64::qword, rBase);
callWrap.addArgument(SizeX64::qword, rConstants);
callWrap.call(qword[rNativeContext + offset]);
emitUpdateBase(build);
}
void emitContinueCallInVm(AssemblyBuilderX64& build)
{
RegisterX64 proto = rcx; // Sync with emitInstCall
build.mov(rdx, qword[proto + offsetof(Proto, code)]);
build.mov(rax, qword[rState + offsetof(lua_State, ci)]);
build.mov(qword[rax + offsetof(CallInfo, savedpc)], rdx);
emitExit(build, /* continueInVm */ true);
}
} // namespace X64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,239 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/AssemblyBuilderX64.h"
#include "EmitCommon.h"
#include "lobject.h"
#include "ltm.h"
// MS x64 ABI reminder:
// Arguments: rcx, rdx, r8, r9 ('overlapped' with xmm0-xmm3)
// Return: rax, xmm0
// Nonvolatile: r12-r15, rdi, rsi, rbx, rbp
// SIMD: only xmm6-xmm15 are non-volatile, all ymm upper parts are volatile
// AMD64 ABI reminder:
// Arguments: rdi, rsi, rdx, rcx, r8, r9 (xmm0-xmm7)
// Return: rax, rdx, xmm0, xmm1
// Nonvolatile: r12-r15, rbx, rbp
// SIMD: all volatile
namespace Luau
{
namespace CodeGen
{
enum class IrCondition : uint8_t;
struct NativeState;
struct IrOp;
namespace X64
{
struct IrRegAllocX64;
constexpr uint32_t kFunctionAlignment = 32;
// Data that is very common to access is placed in non-volatile registers
constexpr RegisterX64 rState = r15; // lua_State* L
constexpr RegisterX64 rBase = r14; // StkId base
constexpr RegisterX64 rNativeContext = r13; // NativeContext* context
constexpr RegisterX64 rConstants = r12; // TValue* k
// Native code is as stackless as the interpreter, so we can place some data on the stack once and have it accessible at any point
// See CodeGenX64.cpp for layout
constexpr unsigned kStackSize = 32 + 16; // 4 home locations for registers, 16 bytes for additional function call arguments
constexpr unsigned kSpillSlots = 4; // locations for register allocator to spill data into
constexpr unsigned kLocalsSize = 24 + 8 * kSpillSlots; // 3 extra slots for our custom locals (also aligns the stack to 16 byte boundary)
constexpr OperandX64 sClosure = qword[rsp + kStackSize + 0]; // Closure* cl
constexpr OperandX64 sCode = qword[rsp + kStackSize + 8]; // Instruction* code
constexpr OperandX64 sTemporarySlot = addr[rsp + kStackSize + 16];
constexpr OperandX64 sSpillArea = addr[rsp + kStackSize + 24];
// TODO: These should be replaced with a portable call function that checks the ABI at runtime and reorders moves accordingly to avoid conflicts
#if defined(_WIN32)
constexpr RegisterX64 rArg1 = rcx;
constexpr RegisterX64 rArg2 = rdx;
constexpr RegisterX64 rArg3 = r8;
constexpr RegisterX64 rArg4 = r9;
constexpr RegisterX64 rArg5 = noreg;
constexpr RegisterX64 rArg6 = noreg;
constexpr OperandX64 sArg5 = qword[rsp + 32];
constexpr OperandX64 sArg6 = qword[rsp + 40];
#else
constexpr RegisterX64 rArg1 = rdi;
constexpr RegisterX64 rArg2 = rsi;
constexpr RegisterX64 rArg3 = rdx;
constexpr RegisterX64 rArg4 = rcx;
constexpr RegisterX64 rArg5 = r8;
constexpr RegisterX64 rArg6 = r9;
constexpr OperandX64 sArg5 = noreg;
constexpr OperandX64 sArg6 = noreg;
#endif
inline OperandX64 luauReg(int ri)
{
return xmmword[rBase + ri * sizeof(TValue)];
}
inline OperandX64 luauRegAddress(int ri)
{
return addr[rBase + ri * sizeof(TValue)];
}
inline OperandX64 luauRegValue(int ri)
{
return qword[rBase + ri * sizeof(TValue) + offsetof(TValue, value)];
}
inline OperandX64 luauRegTag(int ri)
{
return dword[rBase + ri * sizeof(TValue) + offsetof(TValue, tt)];
}
inline OperandX64 luauRegValueInt(int ri)
{
return dword[rBase + ri * sizeof(TValue) + offsetof(TValue, value)];
}
inline OperandX64 luauRegValueVector(int ri, int index)
{
return dword[rBase + ri * sizeof(TValue) + offsetof(TValue, value) + (sizeof(float) * index)];
}
inline OperandX64 luauConstant(int ki)
{
return xmmword[rConstants + ki * sizeof(TValue)];
}
inline OperandX64 luauConstantAddress(int ki)
{
return addr[rConstants + ki * sizeof(TValue)];
}
inline OperandX64 luauConstantTag(int ki)
{
return dword[rConstants + ki * sizeof(TValue) + offsetof(TValue, tt)];
}
inline OperandX64 luauConstantValue(int ki)
{
return qword[rConstants + ki * sizeof(TValue) + offsetof(TValue, value)];
}
inline OperandX64 luauNodeKeyValue(RegisterX64 node)
{
return qword[node + offsetof(LuaNode, key) + offsetof(TKey, value)];
}
// Note: tag has dirty upper bits
inline OperandX64 luauNodeKeyTag(RegisterX64 node)
{
return dword[node + offsetof(LuaNode, key) + kOffsetOfTKeyTagNext];
}
inline OperandX64 luauNodeValue(RegisterX64 node)
{
return xmmword[node + offsetof(LuaNode, val)];
}
inline void setLuauReg(AssemblyBuilderX64& build, RegisterX64 tmp, int ri, OperandX64 op)
{
LUAU_ASSERT(op.cat == CategoryX64::mem);
build.vmovups(tmp, op);
build.vmovups(luauReg(ri), tmp);
}
inline void jumpIfTagIs(AssemblyBuilderX64& build, int ri, lua_Type tag, Label& label)
{
build.cmp(luauRegTag(ri), tag);
build.jcc(ConditionX64::Equal, label);
}
inline void jumpIfTagIsNot(AssemblyBuilderX64& build, int ri, lua_Type tag, Label& label)
{
build.cmp(luauRegTag(ri), tag);
build.jcc(ConditionX64::NotEqual, label);
}
// Note: fallthrough label should be placed after this condition
inline void jumpIfFalsy(AssemblyBuilderX64& build, int ri, Label& target, Label& fallthrough)
{
jumpIfTagIs(build, ri, LUA_TNIL, target); // false if nil
jumpIfTagIsNot(build, ri, LUA_TBOOLEAN, fallthrough); // true if not nil or boolean
build.cmp(luauRegValueInt(ri), 0);
build.jcc(ConditionX64::Equal, target); // true if boolean value is 'true'
}
// Note: fallthrough label should be placed after this condition
inline void jumpIfTruthy(AssemblyBuilderX64& build, int ri, Label& target, Label& fallthrough)
{
jumpIfTagIs(build, ri, LUA_TNIL, fallthrough); // false if nil
jumpIfTagIsNot(build, ri, LUA_TBOOLEAN, target); // true if not nil or boolean
build.cmp(luauRegValueInt(ri), 0);
build.jcc(ConditionX64::NotEqual, target); // true if boolean value is 'true'
}
inline void jumpIfNodeKeyTagIsNot(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 node, lua_Type tag, Label& label)
{
tmp.size = SizeX64::dword;
build.mov(tmp, luauNodeKeyTag(node));
build.and_(tmp, kTKeyTagMask);
build.cmp(tmp, tag);
build.jcc(ConditionX64::NotEqual, label);
}
inline void jumpIfNodeValueTagIs(AssemblyBuilderX64& build, RegisterX64 node, lua_Type tag, Label& label)
{
build.cmp(dword[node + offsetof(LuaNode, val) + offsetof(TValue, tt)], tag);
build.jcc(ConditionX64::Equal, label);
}
inline void jumpIfNodeKeyNotInExpectedSlot(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 node, OperandX64 expectedKey, Label& label)
{
jumpIfNodeKeyTagIsNot(build, tmp, node, LUA_TSTRING, label);
build.mov(tmp, expectedKey);
build.cmp(tmp, luauNodeKeyValue(node));
build.jcc(ConditionX64::NotEqual, label);
jumpIfNodeValueTagIs(build, node, LUA_TNIL, label);
}
void jumpOnNumberCmp(AssemblyBuilderX64& build, RegisterX64 tmp, OperandX64 lhs, OperandX64 rhs, IrCondition cond, Label& label);
void jumpOnAnyCmpFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, IrCondition cond, Label& label);
void getTableNodeAtCachedSlot(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 node, RegisterX64 table, int pcpos);
void convertNumberToIndexOrJump(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 numd, RegisterX64 numi, Label& label);
void callArithHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, OperandX64 c, TMS tm);
void callLengthHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb);
void callPrepareForN(IrRegAllocX64& regs, AssemblyBuilderX64& build, int limit, int step, int init);
void callGetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra);
void callSetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra);
void checkObjectBarrierConditions(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 object, int ra, Label& skip);
void callBarrierObject(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 object, IrOp objectOp, int ra);
void callBarrierTableFast(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 table, IrOp tableOp);
void callStepGc(IrRegAllocX64& regs, AssemblyBuilderX64& build);
void emitExit(AssemblyBuilderX64& build, bool continueInVm);
void emitUpdateBase(AssemblyBuilderX64& build);
void emitInterrupt(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos);
void emitFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int offset, int pcpos);
void emitContinueCallInVm(AssemblyBuilderX64& build);
} // namespace X64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,493 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "EmitInstructionX64.h"
#include "Luau/AssemblyBuilderX64.h"
#include "Luau/IrRegAllocX64.h"
#include "CustomExecUtils.h"
#include "EmitCommonX64.h"
namespace Luau
{
namespace CodeGen
{
namespace X64
{
void emitInstCall(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults)
{
build.mov(rArg1, rState);
build.lea(rArg2, luauRegAddress(ra));
if (nparams == LUA_MULTRET)
build.mov(rArg3, qword[rState + offsetof(lua_State, top)]);
else
build.lea(rArg3, luauRegAddress(ra + 1 + nparams));
build.mov(dwordReg(rArg4), nresults);
build.call(qword[rNativeContext + offsetof(NativeContext, callProlog)]);
RegisterX64 ccl = rax; // Returned from callProlog
emitUpdateBase(build);
Label cFuncCall;
build.test(byte[ccl + offsetof(Closure, isC)], 1);
build.jcc(ConditionX64::NotZero, cFuncCall);
{
RegisterX64 proto = rcx; // Sync with emitContinueCallInVm
RegisterX64 ci = rdx;
RegisterX64 argi = rsi;
RegisterX64 argend = rdi;
build.mov(proto, qword[ccl + offsetof(Closure, l.p)]);
// Switch current Closure
build.mov(sClosure, ccl); // Last use of 'ccl'
build.mov(ci, qword[rState + offsetof(lua_State, ci)]);
Label fillnil, exitfillnil;
// argi = L->top
build.mov(argi, qword[rState + offsetof(lua_State, top)]);
// argend = L->base + p->numparams
build.movzx(eax, byte[proto + offsetof(Proto, numparams)]);
build.shl(eax, kTValueSizeLog2);
build.lea(argend, addr[rBase + rax]);
// while (argi < argend) setnilvalue(argi++);
build.setLabel(fillnil);
build.cmp(argi, argend);
build.jcc(ConditionX64::NotBelow, exitfillnil);
build.mov(dword[argi + offsetof(TValue, tt)], LUA_TNIL);
build.add(argi, sizeof(TValue));
build.jmp(fillnil); // This loop rarely runs so it's not worth repeating cmp/jcc
build.setLabel(exitfillnil);
// Set L->top to ci->top as most function expect (no vararg)
build.mov(rax, qword[ci + offsetof(CallInfo, top)]);
build.mov(qword[rState + offsetof(lua_State, top)], rax);
// But if it is vararg, update it to 'argi'
Label skipVararg;
build.test(byte[proto + offsetof(Proto, is_vararg)], 1);
build.jcc(ConditionX64::Zero, skipVararg);
build.mov(qword[rState + offsetof(lua_State, top)], argi);
build.setLabel(skipVararg);
// Get native function entry
build.mov(rax, qword[proto + offsetof(Proto, exectarget)]);
build.test(rax, rax);
build.jcc(ConditionX64::Zero, helpers.continueCallInVm);
// Mark call frame as custom
build.mov(dword[ci + offsetof(CallInfo, flags)], LUA_CALLINFO_CUSTOM);
// Switch current constants
build.mov(rConstants, qword[proto + offsetof(Proto, k)]);
// Switch current code
build.mov(rdx, qword[proto + offsetof(Proto, code)]);
build.mov(sCode, rdx);
build.jmp(rax);
}
build.setLabel(cFuncCall);
{
// results = ccl->c.f(L);
build.mov(rArg1, rState);
build.call(qword[ccl + offsetof(Closure, c.f)]); // Last use of 'ccl'
RegisterX64 results = eax;
build.test(results, results); // test here will set SF=1 for a negative number and it always sets OF to 0
build.jcc(ConditionX64::Less, helpers.exitNoContinueVm); // jl jumps if SF != OF
// We have special handling for small number of expected results below
if (nresults != 0 && nresults != 1)
{
build.mov(rArg1, rState);
build.mov(dwordReg(rArg2), nresults);
build.mov(dwordReg(rArg3), results);
build.call(qword[rNativeContext + offsetof(NativeContext, callEpilogC)]);
emitUpdateBase(build);
return;
}
RegisterX64 ci = rdx;
RegisterX64 cip = rcx;
RegisterX64 vali = rsi;
build.mov(ci, qword[rState + offsetof(lua_State, ci)]);
build.lea(cip, addr[ci - sizeof(CallInfo)]);
// L->base = cip->base
build.mov(rBase, qword[cip + offsetof(CallInfo, base)]);
build.mov(qword[rState + offsetof(lua_State, base)], rBase);
if (nresults == 1)
{
// Opportunistically copy the result we expected from (L->top - results)
build.mov(vali, qword[rState + offsetof(lua_State, top)]);
build.shl(results, kTValueSizeLog2);
build.sub(vali, qwordReg(results));
build.vmovups(xmm0, xmmword[vali]);
build.vmovups(luauReg(ra), xmm0);
Label skipnil;
// If there was no result, override the value with 'nil'
build.test(results, results);
build.jcc(ConditionX64::NotZero, skipnil);
build.mov(luauRegTag(ra), LUA_TNIL);
build.setLabel(skipnil);
}
// L->ci = cip
build.mov(qword[rState + offsetof(lua_State, ci)], cip);
// L->top = cip->top
build.mov(rax, qword[cip + offsetof(CallInfo, top)]);
build.mov(qword[rState + offsetof(lua_State, top)], rax);
}
}
void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int actualResults)
{
RegisterX64 ci = r8;
RegisterX64 cip = r9;
RegisterX64 res = rdi;
RegisterX64 nresults = esi;
build.mov(ci, qword[rState + offsetof(lua_State, ci)]);
build.lea(cip, addr[ci - sizeof(CallInfo)]);
// res = ci->func; note: we assume CALL always puts func+args and expects results to start at func
build.mov(res, qword[ci + offsetof(CallInfo, func)]);
// nresults = ci->nresults
build.mov(nresults, dword[ci + offsetof(CallInfo, nresults)]);
{
Label skipResultCopy;
RegisterX64 counter = ecx;
if (actualResults == 0)
{
// Our instruction doesn't have any results, so just fill results expected in parent with 'nil'
build.test(nresults, nresults); // test here will set SF=1 for a negative number, ZF=1 for zero and OF=0
build.jcc(ConditionX64::LessEqual, skipResultCopy); // jle jumps if SF != OF or ZF == 1
build.mov(counter, nresults);
Label repeatNilLoop = build.setLabel();
build.mov(dword[res + offsetof(TValue, tt)], LUA_TNIL);
build.add(res, sizeof(TValue));
build.dec(counter);
build.jcc(ConditionX64::NotZero, repeatNilLoop);
}
else if (actualResults == 1)
{
// Try setting our 1 result
build.test(nresults, nresults);
build.jcc(ConditionX64::Zero, skipResultCopy);
build.lea(counter, addr[nresults - 1]);
build.vmovups(xmm0, luauReg(ra));
build.vmovups(xmmword[res], xmm0);
build.add(res, sizeof(TValue));
// Fill the rest of the expected results with 'nil'
build.test(counter, counter); // test here will set SF=1 for a negative number, ZF=1 for zero and OF=0
build.jcc(ConditionX64::LessEqual, skipResultCopy); // jle jumps if SF != OF or ZF == 1
Label repeatNilLoop = build.setLabel();
build.mov(dword[res + offsetof(TValue, tt)], LUA_TNIL);
build.add(res, sizeof(TValue));
build.dec(counter);
build.jcc(ConditionX64::NotZero, repeatNilLoop);
}
else
{
RegisterX64 vali = rax;
RegisterX64 valend = rdx;
// Copy return values into parent stack (but only up to nresults!)
build.test(nresults, nresults);
build.jcc(ConditionX64::Zero, skipResultCopy);
// vali = ra
build.lea(vali, luauRegAddress(ra));
// Copy as much as possible for MULTRET calls, and only as much as needed otherwise
if (actualResults == LUA_MULTRET)
build.mov(valend, qword[rState + offsetof(lua_State, top)]); // valend = L->top
else
build.lea(valend, luauRegAddress(ra + actualResults)); // valend = ra + actualResults
build.mov(counter, nresults);
Label repeatValueLoop, exitValueLoop;
build.setLabel(repeatValueLoop);
build.cmp(vali, valend);
build.jcc(ConditionX64::NotBelow, exitValueLoop);
build.vmovups(xmm0, xmmword[vali]);
build.vmovups(xmmword[res], xmm0);
build.add(vali, sizeof(TValue));
build.add(res, sizeof(TValue));
build.dec(counter);
build.jcc(ConditionX64::NotZero, repeatValueLoop);
build.setLabel(exitValueLoop);
// Fill the rest of the expected results with 'nil'
build.test(counter, counter); // test here will set SF=1 for a negative number, ZF=1 for zero and OF=0
build.jcc(ConditionX64::LessEqual, skipResultCopy); // jle jumps if SF != OF or ZF == 1
Label repeatNilLoop = build.setLabel();
build.mov(dword[res + offsetof(TValue, tt)], LUA_TNIL);
build.add(res, sizeof(TValue));
build.dec(counter);
build.jcc(ConditionX64::NotZero, repeatNilLoop);
}
build.setLabel(skipResultCopy);
}
build.mov(qword[rState + offsetof(lua_State, ci)], cip); // L->ci = cip
build.mov(rBase, qword[cip + offsetof(CallInfo, base)]); // sync base = L->base while we have a chance
build.mov(qword[rState + offsetof(lua_State, base)], rBase); // L->base = cip->base
// Start with result for LUA_MULTRET/exit value
build.mov(qword[rState + offsetof(lua_State, top)], res); // L->top = res
// Unlikely, but this might be the last return from VM
build.test(byte[ci + offsetof(CallInfo, flags)], LUA_CALLINFO_RETURN);
build.jcc(ConditionX64::NotZero, helpers.exitNoContinueVm);
Label skipFixedRetTop;
build.test(nresults, nresults); // test here will set SF=1 for a negative number and it always sets OF to 0
build.jcc(ConditionX64::Less, skipFixedRetTop); // jl jumps if SF != OF
build.mov(rax, qword[cip + offsetof(CallInfo, top)]);
build.mov(qword[rState + offsetof(lua_State, top)], rax); // L->top = cip->top
build.setLabel(skipFixedRetTop);
// Returning back to the previous function is a bit tricky
// Registers alive: r9 (cip)
RegisterX64 proto = rcx;
RegisterX64 execdata = rbx;
// Change closure
build.mov(rax, qword[cip + offsetof(CallInfo, func)]);
build.mov(rax, qword[rax + offsetof(TValue, value.gc)]);
build.mov(sClosure, rax);
build.mov(proto, qword[rax + offsetof(Closure, l.p)]);
build.mov(execdata, qword[proto + offsetof(Proto, execdata)]);
build.test(byte[cip + offsetof(CallInfo, flags)], LUA_CALLINFO_CUSTOM);
build.jcc(ConditionX64::Zero, helpers.exitContinueVm); // Continue in interpreter if function has no native data
// Change constants
build.mov(rConstants, qword[proto + offsetof(Proto, k)]);
// Change code
build.mov(rdx, qword[proto + offsetof(Proto, code)]);
build.mov(sCode, rdx);
build.mov(rax, qword[cip + offsetof(CallInfo, savedpc)]);
// To get instruction index from instruction pointer, we need to divide byte offset by 4
// But we will actually need to scale instruction index by 4 back to byte offset later so it cancels out
build.sub(rax, rdx);
// Get new instruction location and jump to it
build.mov(edx, dword[execdata + rax]);
build.add(rdx, qword[proto + offsetof(Proto, exectarget)]);
build.jmp(rdx);
}
void emitInstSetList(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, int count, uint32_t index)
{
OperandX64 last = index + count - 1;
// Using non-volatile 'rbx' for dynamic 'count' value (for LUA_MULTRET) to skip later recomputation
// We also keep 'count' scaled by sizeof(TValue) here as it helps in the loop below
RegisterX64 cscaled = rbx;
if (count == LUA_MULTRET)
{
RegisterX64 tmp = rax;
// count = L->top - rb
build.mov(cscaled, qword[rState + offsetof(lua_State, top)]);
build.lea(tmp, luauRegAddress(rb));
build.sub(cscaled, tmp); // Using byte difference
// L->top = L->ci->top
build.mov(tmp, qword[rState + offsetof(lua_State, ci)]);
build.mov(tmp, qword[tmp + offsetof(CallInfo, top)]);
build.mov(qword[rState + offsetof(lua_State, top)], tmp);
// last = index + count - 1;
last = edx;
build.mov(last, dwordReg(cscaled));
build.shr(last, kTValueSizeLog2);
build.add(last, index - 1);
}
Label skipResize;
RegisterX64 table = regs.takeReg(rax, kInvalidInstIdx);
build.mov(table, luauRegValue(ra));
// Resize if h->sizearray < last
build.cmp(dword[table + offsetof(Table, sizearray)], last);
build.jcc(ConditionX64::NotBelow, skipResize);
// Argument setup reordered to avoid conflicts
LUAU_ASSERT(rArg3 != table);
build.mov(dwordReg(rArg3), last);
build.mov(rArg2, table);
build.mov(rArg1, rState);
build.call(qword[rNativeContext + offsetof(NativeContext, luaH_resizearray)]);
build.mov(table, luauRegValue(ra)); // Reload cloberred register value
build.setLabel(skipResize);
RegisterX64 arrayDst = rdx;
RegisterX64 offset = rcx;
build.mov(arrayDst, qword[table + offsetof(Table, array)]);
const int kUnrollSetListLimit = 4;
if (count != LUA_MULTRET && count <= kUnrollSetListLimit)
{
for (int i = 0; i < count; ++i)
{
// setobj2t(L, &array[index + i - 1], rb + i);
build.vmovups(xmm0, luauRegValue(rb + i));
build.vmovups(xmmword[arrayDst + (index + i - 1) * sizeof(TValue)], xmm0);
}
}
else
{
LUAU_ASSERT(count != 0);
build.xor_(offset, offset);
if (index != 1)
build.add(arrayDst, (index - 1) * sizeof(TValue));
Label repeatLoop, endLoop;
OperandX64 limit = count == LUA_MULTRET ? cscaled : OperandX64(count * sizeof(TValue));
// If c is static, we will always do at least one iteration
if (count == LUA_MULTRET)
{
build.cmp(offset, limit);
build.jcc(ConditionX64::NotBelow, endLoop);
}
build.setLabel(repeatLoop);
// setobj2t(L, &array[index + i - 1], rb + i);
build.vmovups(xmm0, xmmword[offset + rBase + rb * sizeof(TValue)]); // luauReg(rb) unwrapped to add offset
build.vmovups(xmmword[offset + arrayDst], xmm0);
build.add(offset, sizeof(TValue));
build.cmp(offset, limit);
build.jcc(ConditionX64::Below, repeatLoop);
build.setLabel(endLoop);
}
callBarrierTableFast(regs, build, table, {});
}
void emitInstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat)
{
// ipairs-style traversal is handled in IR
LUAU_ASSERT(aux >= 0);
// This is a fast-path for builtin table iteration, tag check for 'ra' has to be performed before emitting this instruction
// Registers are chosen in this way to simplify fallback code for the node part
RegisterX64 table = rArg2;
RegisterX64 index = rArg3;
RegisterX64 elemPtr = rax;
build.mov(table, luauRegValue(ra + 1));
build.mov(index, luauRegValue(ra + 2));
// &array[index]
build.mov(dwordReg(elemPtr), dwordReg(index));
build.shl(dwordReg(elemPtr), kTValueSizeLog2);
build.add(elemPtr, qword[table + offsetof(Table, array)]);
// Clear extra variables since we might have more than two
for (int i = 2; i < aux; ++i)
build.mov(luauRegTag(ra + 3 + i), LUA_TNIL);
Label skipArray, skipArrayNil;
// First we advance index through the array portion
// while (unsigned(index) < unsigned(sizearray))
Label arrayLoop = build.setLabel();
build.cmp(dwordReg(index), dword[table + offsetof(Table, sizearray)]);
build.jcc(ConditionX64::NotBelow, skipArray);
// If element is nil, we increment the index; if it's not, we still need 'index + 1' inside
build.inc(index);
build.cmp(dword[elemPtr + offsetof(TValue, tt)], LUA_TNIL);
build.jcc(ConditionX64::Equal, skipArrayNil);
// setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)));
build.mov(luauRegValue(ra + 2), index);
// Tag should already be set to lightuserdata
// setnvalue(ra + 3, double(index + 1));
build.vcvtsi2sd(xmm0, xmm0, dwordReg(index));
build.vmovsd(luauRegValue(ra + 3), xmm0);
build.mov(luauRegTag(ra + 3), LUA_TNUMBER);
// setobj2s(L, ra + 4, e);
setLuauReg(build, xmm2, ra + 4, xmmword[elemPtr]);
build.jmp(loopRepeat);
build.setLabel(skipArrayNil);
// Index already incremented, advance to next array element
build.add(elemPtr, sizeof(TValue));
build.jmp(arrayLoop);
build.setLabel(skipArray);
// Call helper to assign next node value or to signal loop exit
build.mov(rArg1, rState);
// rArg2 and rArg3 are already set
build.lea(rArg4, luauRegAddress(ra));
build.call(qword[rNativeContext + offsetof(NativeContext, forgLoopNodeIter)]);
build.test(al, al);
build.jcc(ConditionX64::NotZero, loopRepeat);
}
} // namespace X64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,27 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include <stdint.h>
namespace Luau
{
namespace CodeGen
{
struct Label;
struct ModuleHelpers;
namespace X64
{
class AssemblyBuilderX64;
struct IrRegAllocX64;
void emitInstCall(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults);
void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int actualResults);
void emitInstSetList(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, int count, uint32_t index);
void emitInstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat);
} // namespace X64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,691 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/IrAnalysis.h"
#include "Luau/DenseHash.h"
#include "Luau/IrData.h"
#include "Luau/IrUtils.h"
#include "lobject.h"
#include <bitset>
#include <stddef.h>
namespace Luau
{
namespace CodeGen
{
void updateUseCounts(IrFunction& function)
{
std::vector<IrBlock>& blocks = function.blocks;
std::vector<IrInst>& instructions = function.instructions;
for (IrBlock& block : blocks)
block.useCount = 0;
for (IrInst& inst : instructions)
inst.useCount = 0;
auto checkOp = [&](IrOp op) {
if (op.kind == IrOpKind::Inst)
{
IrInst& target = instructions[op.index];
LUAU_ASSERT(target.useCount < 0xffff);
target.useCount++;
}
else if (op.kind == IrOpKind::Block)
{
IrBlock& target = blocks[op.index];
LUAU_ASSERT(target.useCount < 0xffff);
target.useCount++;
}
};
for (IrInst& inst : instructions)
{
checkOp(inst.a);
checkOp(inst.b);
checkOp(inst.c);
checkOp(inst.d);
checkOp(inst.e);
checkOp(inst.f);
}
}
void updateLastUseLocations(IrFunction& function)
{
std::vector<IrInst>& instructions = function.instructions;
for (IrInst& inst : instructions)
inst.lastUse = 0;
for (size_t instIdx = 0; instIdx < instructions.size(); ++instIdx)
{
IrInst& inst = instructions[instIdx];
auto checkOp = [&](IrOp op) {
if (op.kind == IrOpKind::Inst)
instructions[op.index].lastUse = uint32_t(instIdx);
};
if (isPseudo(inst.cmd))
continue;
checkOp(inst.a);
checkOp(inst.b);
checkOp(inst.c);
checkOp(inst.d);
checkOp(inst.e);
checkOp(inst.f);
}
}
uint32_t getNextInstUse(IrFunction& function, uint32_t targetInstIdx, uint32_t startInstIdx)
{
LUAU_ASSERT(startInstIdx < function.instructions.size());
IrInst& targetInst = function.instructions[targetInstIdx];
for (uint32_t i = startInstIdx; i <= targetInst.lastUse; i++)
{
IrInst& inst = function.instructions[i];
if (isPseudo(inst.cmd))
continue;
if (inst.a.kind == IrOpKind::Inst && inst.a.index == targetInstIdx)
return i;
if (inst.b.kind == IrOpKind::Inst && inst.b.index == targetInstIdx)
return i;
if (inst.c.kind == IrOpKind::Inst && inst.c.index == targetInstIdx)
return i;
if (inst.d.kind == IrOpKind::Inst && inst.d.index == targetInstIdx)
return i;
if (inst.e.kind == IrOpKind::Inst && inst.e.index == targetInstIdx)
return i;
if (inst.f.kind == IrOpKind::Inst && inst.f.index == targetInstIdx)
return i;
}
// There must be a next use since there is the last use location
LUAU_ASSERT(!"failed to find next use");
return targetInst.lastUse;
}
std::pair<uint32_t, uint32_t> getLiveInOutValueCount(IrFunction& function, IrBlock& block)
{
uint32_t liveIns = 0;
uint32_t liveOuts = 0;
auto checkOp = [&](IrOp op) {
if (op.kind == IrOpKind::Inst)
{
if (op.index >= block.start && op.index <= block.finish)
liveOuts--;
else
liveIns++;
}
};
for (uint32_t instIdx = block.start; instIdx <= block.finish; instIdx++)
{
IrInst& inst = function.instructions[instIdx];
if (isPseudo(inst.cmd))
continue;
liveOuts += inst.useCount;
checkOp(inst.a);
checkOp(inst.b);
checkOp(inst.c);
checkOp(inst.d);
checkOp(inst.e);
checkOp(inst.f);
}
return std::make_pair(liveIns, liveOuts);
}
uint32_t getLiveInValueCount(IrFunction& function, IrBlock& block)
{
return getLiveInOutValueCount(function, block).first;
}
uint32_t getLiveOutValueCount(IrFunction& function, IrBlock& block)
{
return getLiveInOutValueCount(function, block).second;
}
void requireVariadicSequence(RegisterSet& sourceRs, const RegisterSet& defRs, uint8_t varargStart)
{
if (!defRs.varargSeq)
{
// Peel away registers from variadic sequence that we define
while (defRs.regs.test(varargStart))
varargStart++;
LUAU_ASSERT(!sourceRs.varargSeq || sourceRs.varargStart == varargStart);
sourceRs.varargSeq = true;
sourceRs.varargStart = varargStart;
}
else
{
// Variadic use sequence might include registers before def sequence
for (int i = varargStart; i < defRs.varargStart; i++)
{
if (!defRs.regs.test(i))
sourceRs.regs.set(i);
}
}
}
static RegisterSet computeBlockLiveInRegSet(IrFunction& function, const IrBlock& block, RegisterSet& defRs, std::bitset<256>& capturedRegs)
{
RegisterSet inRs;
auto def = [&](IrOp op, int offset = 0) {
defRs.regs.set(vmRegOp(op) + offset, true);
};
auto use = [&](IrOp op, int offset = 0) {
if (!defRs.regs.test(vmRegOp(op) + offset))
inRs.regs.set(vmRegOp(op) + offset, true);
};
auto maybeDef = [&](IrOp op) {
if (op.kind == IrOpKind::VmReg)
defRs.regs.set(vmRegOp(op), true);
};
auto maybeUse = [&](IrOp op) {
if (op.kind == IrOpKind::VmReg)
{
if (!defRs.regs.test(vmRegOp(op)))
inRs.regs.set(vmRegOp(op), true);
}
};
auto defVarargs = [&](uint8_t varargStart) {
defRs.varargSeq = true;
defRs.varargStart = varargStart;
};
auto useVarargs = [&](uint8_t varargStart) {
requireVariadicSequence(inRs, defRs, varargStart);
// Variadic sequence has been consumed
defRs.varargSeq = false;
defRs.varargStart = 0;
};
auto defRange = [&](int start, int count) {
if (count == -1)
{
defVarargs(start);
}
else
{
for (int i = start; i < start + count; i++)
defRs.regs.set(i, true);
}
};
auto useRange = [&](int start, int count) {
if (count == -1)
{
useVarargs(start);
}
else
{
for (int i = start; i < start + count; i++)
{
if (!defRs.regs.test(i))
inRs.regs.set(i, true);
}
}
};
for (uint32_t instIdx = block.start; instIdx <= block.finish; instIdx++)
{
const IrInst& inst = function.instructions[instIdx];
// For correct analysis, all instruction uses must be handled before handling the definitions
switch (inst.cmd)
{
case IrCmd::LOAD_TAG:
case IrCmd::LOAD_POINTER:
case IrCmd::LOAD_DOUBLE:
case IrCmd::LOAD_INT:
case IrCmd::LOAD_TVALUE:
maybeUse(inst.a); // Argument can also be a VmConst
break;
case IrCmd::STORE_TAG:
case IrCmd::STORE_POINTER:
case IrCmd::STORE_DOUBLE:
case IrCmd::STORE_INT:
case IrCmd::STORE_VECTOR:
case IrCmd::STORE_TVALUE:
maybeDef(inst.a); // Argument can also be a pointer value
break;
case IrCmd::JUMP_IF_TRUTHY:
case IrCmd::JUMP_IF_FALSY:
use(inst.a);
break;
case IrCmd::JUMP_CMP_ANY:
use(inst.a);
use(inst.b);
break;
// A <- B, C
case IrCmd::DO_ARITH:
case IrCmd::GET_TABLE:
use(inst.b);
maybeUse(inst.c); // Argument can also be a VmConst
def(inst.a);
break;
case IrCmd::SET_TABLE:
use(inst.a);
use(inst.b);
maybeUse(inst.c); // Argument can also be a VmConst
break;
// A <- B
case IrCmd::DO_LEN:
use(inst.b);
def(inst.a);
break;
case IrCmd::GET_IMPORT:
def(inst.a);
break;
case IrCmd::CONCAT:
useRange(vmRegOp(inst.a), function.uintOp(inst.b));
defRange(vmRegOp(inst.a), function.uintOp(inst.b));
break;
case IrCmd::GET_UPVALUE:
def(inst.a);
break;
case IrCmd::SET_UPVALUE:
use(inst.b);
break;
case IrCmd::PREPARE_FORN:
use(inst.a);
use(inst.b);
use(inst.c);
def(inst.a);
def(inst.b);
def(inst.c);
break;
case IrCmd::INTERRUPT:
break;
case IrCmd::BARRIER_OBJ:
case IrCmd::BARRIER_TABLE_FORWARD:
use(inst.b);
break;
case IrCmd::CLOSE_UPVALS:
// Closing an upvalue should be counted as a register use (it copies the fresh register value)
// But we lack the required information about the specific set of registers that are affected
// Because we don't plan to optimize captured registers atm, we skip full dataflow analysis for them right now
break;
case IrCmd::CAPTURE:
maybeUse(inst.a);
if (function.boolOp(inst.b))
capturedRegs.set(vmRegOp(inst.a), true);
break;
case IrCmd::SETLIST:
use(inst.b);
useRange(vmRegOp(inst.c), function.intOp(inst.d));
break;
case IrCmd::CALL:
use(inst.a);
useRange(vmRegOp(inst.a) + 1, function.intOp(inst.b));
defRange(vmRegOp(inst.a), function.intOp(inst.c));
break;
case IrCmd::RETURN:
useRange(vmRegOp(inst.a), function.intOp(inst.b));
break;
// TODO: FASTCALL is more restrictive than INVOKE_FASTCALL; we should either determine the exact semantics, or rework it
case IrCmd::FASTCALL:
case IrCmd::INVOKE_FASTCALL:
if (int count = function.intOp(inst.e); count != -1)
{
if (count >= 3)
{
LUAU_ASSERT(inst.d.kind == IrOpKind::VmReg && vmRegOp(inst.d) == vmRegOp(inst.c) + 1);
useRange(vmRegOp(inst.c), count);
}
else
{
if (count >= 1)
use(inst.c);
if (count >= 2)
maybeUse(inst.d); // Argument can also be a VmConst
}
}
else
{
useVarargs(vmRegOp(inst.c));
}
// Multiple return sequences (count == -1) are defined by ADJUST_STACK_TO_REG
if (int count = function.intOp(inst.f); count != -1)
defRange(vmRegOp(inst.b), count);
break;
case IrCmd::FORGLOOP:
// First register is not used by instruction, we check that it's still 'nil' with CHECK_TAG
use(inst.a, 1);
use(inst.a, 2);
def(inst.a, 2);
defRange(vmRegOp(inst.a) + 3, function.intOp(inst.b));
break;
case IrCmd::FORGLOOP_FALLBACK:
useRange(vmRegOp(inst.a), 3);
def(inst.a, 2);
defRange(vmRegOp(inst.a) + 3, uint8_t(function.intOp(inst.b))); // ignore most significant bit
break;
case IrCmd::FORGPREP_XNEXT_FALLBACK:
use(inst.b);
break;
case IrCmd::FALLBACK_GETGLOBAL:
def(inst.b);
break;
case IrCmd::FALLBACK_SETGLOBAL:
use(inst.b);
break;
case IrCmd::FALLBACK_GETTABLEKS:
use(inst.c);
def(inst.b);
break;
case IrCmd::FALLBACK_SETTABLEKS:
use(inst.b);
use(inst.c);
break;
case IrCmd::FALLBACK_NAMECALL:
use(inst.c);
defRange(vmRegOp(inst.b), 2);
break;
case IrCmd::FALLBACK_PREPVARARGS:
// No effect on explicitly referenced registers
break;
case IrCmd::FALLBACK_GETVARARGS:
defRange(vmRegOp(inst.b), function.intOp(inst.c));
break;
case IrCmd::FALLBACK_NEWCLOSURE:
def(inst.b);
break;
case IrCmd::FALLBACK_DUPCLOSURE:
def(inst.b);
break;
case IrCmd::FALLBACK_FORGPREP:
use(inst.b);
defRange(vmRegOp(inst.b), 3);
break;
case IrCmd::ADJUST_STACK_TO_REG:
defRange(vmRegOp(inst.a), -1);
break;
case IrCmd::ADJUST_STACK_TO_TOP:
// While this can be considered to be a vararg consumer, it is already handled in fastcall instructions
break;
default:
// All instructions which reference registers have to be handled explicitly
LUAU_ASSERT(inst.a.kind != IrOpKind::VmReg);
LUAU_ASSERT(inst.b.kind != IrOpKind::VmReg);
LUAU_ASSERT(inst.c.kind != IrOpKind::VmReg);
LUAU_ASSERT(inst.d.kind != IrOpKind::VmReg);
LUAU_ASSERT(inst.e.kind != IrOpKind::VmReg);
LUAU_ASSERT(inst.f.kind != IrOpKind::VmReg);
break;
}
}
return inRs;
}
// The algorithm used here is commonly known as backwards data-flow analysis.
// For each block, we track 'upward-exposed' (live-in) uses of registers - a use of a register that hasn't been defined in the block yet.
// We also track the set of registers that were defined in the block.
// When initial live-in sets of registers are computed, propagation of those uses upwards through predecessors is performed.
// If predecessor doesn't define the register, we have to add it to the live-in set.
// Extending the set of live-in registers of a block requires re-checking of that block.
// Propagation runs iteratively, using a worklist of blocks to visit until a fixed point is reached.
// This algorithm can be easily extended to cover phi instructions, but we don't use those yet.
static void computeCfgLiveInOutRegSets(IrFunction& function)
{
CfgInfo& info = function.cfg;
// Clear existing data
// 'in' and 'captured' data is not cleared because it will be overwritten below
info.def.clear();
info.out.clear();
// Try to compute Luau VM register use-def info
info.in.resize(function.blocks.size());
info.def.resize(function.blocks.size());
info.out.resize(function.blocks.size());
// Captured registers are tracked for the whole function
// It should be possible to have a more precise analysis for them in the future
std::bitset<256> capturedRegs;
// First we compute live-in set of each block
for (size_t blockIdx = 0; blockIdx < function.blocks.size(); blockIdx++)
{
const IrBlock& block = function.blocks[blockIdx];
if (block.kind == IrBlockKind::Dead)
continue;
info.in[blockIdx] = computeBlockLiveInRegSet(function, block, info.def[blockIdx], capturedRegs);
}
info.captured.regs = capturedRegs;
// With live-in sets ready, we can arrive at a fixed point for both in/out registers by requesting required registers from predecessors
std::vector<uint32_t> worklist;
std::vector<uint8_t> inWorklist;
inWorklist.resize(function.blocks.size(), false);
// We will have to visit each block at least once, so we add all of them to the worklist immediately
for (size_t blockIdx = 0; blockIdx < function.blocks.size(); blockIdx++)
{
const IrBlock& block = function.blocks[blockIdx];
if (block.kind == IrBlockKind::Dead)
continue;
worklist.push_back(uint32_t(blockIdx));
inWorklist[blockIdx] = true;
}
while (!worklist.empty())
{
uint32_t blockIdx = worklist.back();
worklist.pop_back();
inWorklist[blockIdx] = false;
IrBlock& curr = function.blocks[blockIdx];
RegisterSet& inRs = info.in[blockIdx];
RegisterSet& defRs = info.def[blockIdx];
RegisterSet& outRs = info.out[blockIdx];
// Current block has to provide all registers in successor blocks
BlockIteratorWrapper successorsIt = successors(info, blockIdx);
for (uint32_t succIdx : successorsIt)
{
IrBlock& succ = function.blocks[succIdx];
// This is a step away from the usual definition of live range flow through CFG
// Exit from a regular block to a fallback block is not considered a block terminator
// This is because fallback blocks define an alternative implementation of the same operations
// This can cause the current block to define more registers that actually were available at fallback entry
if (curr.kind != IrBlockKind::Fallback && succ.kind == IrBlockKind::Fallback)
{
// If this is the only successor, this skip will not be valid
LUAU_ASSERT(successorsIt.size() != 1);
continue;
}
const RegisterSet& succRs = info.in[succIdx];
outRs.regs |= succRs.regs;
if (succRs.varargSeq)
{
LUAU_ASSERT(!outRs.varargSeq || outRs.varargStart == succRs.varargStart);
outRs.varargSeq = true;
outRs.varargStart = succRs.varargStart;
}
}
RegisterSet oldInRs = inRs;
// If current block didn't define a live-out, it has to be live-in
inRs.regs |= outRs.regs & ~defRs.regs;
if (outRs.varargSeq)
requireVariadicSequence(inRs, defRs, outRs.varargStart);
// If we have new live-ins, we have to notify all predecessors
// We don't allow changes to the start of the variadic sequence, so we skip checking that member
if (inRs.regs != oldInRs.regs || inRs.varargSeq != oldInRs.varargSeq)
{
for (uint32_t predIdx : predecessors(info, blockIdx))
{
if (!inWorklist[predIdx])
{
worklist.push_back(predIdx);
inWorklist[predIdx] = true;
}
}
}
}
// If Proto data is available, validate that entry block arguments match required registers
if (function.proto)
{
RegisterSet& entryIn = info.in[0];
LUAU_ASSERT(!entryIn.varargSeq);
for (size_t i = 0; i < entryIn.regs.size(); i++)
LUAU_ASSERT(!entryIn.regs.test(i) || i < function.proto->numparams);
}
}
static void computeCfgBlockEdges(IrFunction& function)
{
CfgInfo& info = function.cfg;
// Clear existing data
info.predecessorsOffsets.clear();
info.successorsOffsets.clear();
// Compute predecessors block edges
info.predecessorsOffsets.reserve(function.blocks.size());
info.successorsOffsets.reserve(function.blocks.size());
int edgeCount = 0;
for (const IrBlock& block : function.blocks)
{
info.predecessorsOffsets.push_back(edgeCount);
edgeCount += block.useCount;
}
info.predecessors.resize(edgeCount);
info.successors.resize(edgeCount);
edgeCount = 0;
for (size_t blockIdx = 0; blockIdx < function.blocks.size(); blockIdx++)
{
const IrBlock& block = function.blocks[blockIdx];
info.successorsOffsets.push_back(edgeCount);
if (block.kind == IrBlockKind::Dead)
continue;
for (uint32_t instIdx = block.start; instIdx <= block.finish; instIdx++)
{
const IrInst& inst = function.instructions[instIdx];
auto checkOp = [&](IrOp op) {
if (op.kind == IrOpKind::Block)
{
// We use a trick here, where we use the starting offset of the predecessor list as the position where to write next predecessor
// The values will be adjusted back in a separate loop later
info.predecessors[info.predecessorsOffsets[op.index]++] = uint32_t(blockIdx);
info.successors[edgeCount++] = op.index;
}
};
checkOp(inst.a);
checkOp(inst.b);
checkOp(inst.c);
checkOp(inst.d);
checkOp(inst.e);
checkOp(inst.f);
}
}
// Offsets into the predecessor list were used as iterators in the previous loop
// To adjust them back, block use count is subtracted (predecessor count is equal to how many uses block has)
for (size_t blockIdx = 0; blockIdx < function.blocks.size(); blockIdx++)
{
const IrBlock& block = function.blocks[blockIdx];
info.predecessorsOffsets[blockIdx] -= block.useCount;
}
}
void computeCfgInfo(IrFunction& function)
{
computeCfgBlockEdges(function);
computeCfgLiveInOutRegSets(function);
}
BlockIteratorWrapper predecessors(const CfgInfo& cfg, uint32_t blockIdx)
{
LUAU_ASSERT(blockIdx < cfg.predecessorsOffsets.size());
uint32_t start = cfg.predecessorsOffsets[blockIdx];
uint32_t end = blockIdx + 1 < cfg.predecessorsOffsets.size() ? cfg.predecessorsOffsets[blockIdx + 1] : uint32_t(cfg.predecessors.size());
return BlockIteratorWrapper{cfg.predecessors.data() + start, cfg.predecessors.data() + end};
}
BlockIteratorWrapper successors(const CfgInfo& cfg, uint32_t blockIdx)
{
LUAU_ASSERT(blockIdx < cfg.successorsOffsets.size());
uint32_t start = cfg.successorsOffsets[blockIdx];
uint32_t end = blockIdx + 1 < cfg.successorsOffsets.size() ? cfg.successorsOffsets[blockIdx + 1] : uint32_t(cfg.successors.size());
return BlockIteratorWrapper{cfg.successors.data() + start, cfg.successors.data() + end};
}
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,651 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/IrBuilder.h"
#include "Luau/IrAnalysis.h"
#include "Luau/IrUtils.h"
#include "CustomExecUtils.h"
#include "IrTranslation.h"
#include "lapi.h"
#include <string.h>
namespace Luau
{
namespace CodeGen
{
constexpr unsigned kNoAssociatedBlockIndex = ~0u;
IrBuilder::IrBuilder()
: constantMap({IrConstKind::Bool, ~0ull})
{
}
void IrBuilder::buildFunctionIr(Proto* proto)
{
function.proto = proto;
// Rebuild original control flow blocks
rebuildBytecodeBasicBlocks(proto);
function.bcMapping.resize(proto->sizecode, {~0u, ~0u});
// Translate all instructions to IR inside blocks
for (int i = 0; i < proto->sizecode;)
{
const Instruction* pc = &proto->code[i];
LuauOpcode op = LuauOpcode(LUAU_INSN_OP(*pc));
int nexti = i + getOpLength(op);
LUAU_ASSERT(nexti <= proto->sizecode);
function.bcMapping[i] = {uint32_t(function.instructions.size()), ~0u};
// Begin new block at this instruction if it was in the bytecode or requested during translation
if (instIndexToBlock[i] != kNoAssociatedBlockIndex)
beginBlock(blockAtInst(i));
// We skip dead bytecode instructions when they appear after block was already terminated
if (!inTerminatedBlock)
translateInst(op, pc, i);
i = nexti;
LUAU_ASSERT(i <= proto->sizecode);
// If we are going into a new block at the next instruction and it's a fallthrough, jump has to be placed to mark block termination
if (i < int(instIndexToBlock.size()) && instIndexToBlock[i] != kNoAssociatedBlockIndex)
{
if (!isBlockTerminator(function.instructions.back().cmd))
inst(IrCmd::JUMP, blockAtInst(i));
}
}
// Now that all has been generated, compute use counts
updateUseCounts(function);
}
void IrBuilder::rebuildBytecodeBasicBlocks(Proto* proto)
{
instIndexToBlock.resize(proto->sizecode, kNoAssociatedBlockIndex);
// Mark jump targets
std::vector<uint8_t> jumpTargets(proto->sizecode, 0);
for (int i = 0; i < proto->sizecode;)
{
const Instruction* pc = &proto->code[i];
LuauOpcode op = LuauOpcode(LUAU_INSN_OP(*pc));
int target = getJumpTarget(*pc, uint32_t(i));
if (target >= 0 && !isFastCall(op))
jumpTargets[target] = true;
i += getOpLength(op);
LUAU_ASSERT(i <= proto->sizecode);
}
// Bytecode blocks are created at bytecode jump targets and the start of a function
jumpTargets[0] = true;
for (int i = 0; i < proto->sizecode; i++)
{
if (jumpTargets[i])
{
IrOp b = block(IrBlockKind::Bytecode);
instIndexToBlock[i] = b.index;
}
}
}
void IrBuilder::translateInst(LuauOpcode op, const Instruction* pc, int i)
{
switch (op)
{
case LOP_NOP:
break;
case LOP_LOADNIL:
translateInstLoadNil(*this, pc);
break;
case LOP_LOADB:
translateInstLoadB(*this, pc, i);
break;
case LOP_LOADN:
translateInstLoadN(*this, pc);
break;
case LOP_LOADK:
translateInstLoadK(*this, pc);
break;
case LOP_LOADKX:
translateInstLoadKX(*this, pc);
break;
case LOP_MOVE:
translateInstMove(*this, pc);
break;
case LOP_GETGLOBAL:
translateInstGetGlobal(*this, pc, i);
break;
case LOP_SETGLOBAL:
translateInstSetGlobal(*this, pc, i);
break;
case LOP_CALL:
inst(IrCmd::INTERRUPT, constUint(i));
inst(IrCmd::SET_SAVEDPC, constUint(i + 1));
inst(IrCmd::CALL, vmReg(LUAU_INSN_A(*pc)), constInt(LUAU_INSN_B(*pc) - 1), constInt(LUAU_INSN_C(*pc) - 1));
if (activeFastcallFallback)
{
inst(IrCmd::JUMP, fastcallFallbackReturn);
beginBlock(fastcallFallbackReturn);
activeFastcallFallback = false;
}
break;
case LOP_RETURN:
inst(IrCmd::INTERRUPT, constUint(i));
inst(IrCmd::RETURN, vmReg(LUAU_INSN_A(*pc)), constInt(LUAU_INSN_B(*pc) - 1));
break;
case LOP_GETTABLE:
translateInstGetTable(*this, pc, i);
break;
case LOP_SETTABLE:
translateInstSetTable(*this, pc, i);
break;
case LOP_GETTABLEKS:
translateInstGetTableKS(*this, pc, i);
break;
case LOP_SETTABLEKS:
translateInstSetTableKS(*this, pc, i);
break;
case LOP_GETTABLEN:
translateInstGetTableN(*this, pc, i);
break;
case LOP_SETTABLEN:
translateInstSetTableN(*this, pc, i);
break;
case LOP_JUMP:
translateInstJump(*this, pc, i);
break;
case LOP_JUMPBACK:
translateInstJumpBack(*this, pc, i);
break;
case LOP_JUMPIF:
translateInstJumpIf(*this, pc, i, /* not_ */ false);
break;
case LOP_JUMPIFNOT:
translateInstJumpIf(*this, pc, i, /* not_ */ true);
break;
case LOP_JUMPIFEQ:
translateInstJumpIfEq(*this, pc, i, /* not_ */ false);
break;
case LOP_JUMPIFLE:
translateInstJumpIfCond(*this, pc, i, IrCondition::LessEqual);
break;
case LOP_JUMPIFLT:
translateInstJumpIfCond(*this, pc, i, IrCondition::Less);
break;
case LOP_JUMPIFNOTEQ:
translateInstJumpIfEq(*this, pc, i, /* not_ */ true);
break;
case LOP_JUMPIFNOTLE:
translateInstJumpIfCond(*this, pc, i, IrCondition::NotLessEqual);
break;
case LOP_JUMPIFNOTLT:
translateInstJumpIfCond(*this, pc, i, IrCondition::NotLess);
break;
case LOP_JUMPX:
translateInstJumpX(*this, pc, i);
break;
case LOP_JUMPXEQKNIL:
translateInstJumpxEqNil(*this, pc, i);
break;
case LOP_JUMPXEQKB:
translateInstJumpxEqB(*this, pc, i);
break;
case LOP_JUMPXEQKN:
translateInstJumpxEqN(*this, pc, i);
break;
case LOP_JUMPXEQKS:
translateInstJumpxEqS(*this, pc, i);
break;
case LOP_ADD:
translateInstBinary(*this, pc, i, TM_ADD);
break;
case LOP_SUB:
translateInstBinary(*this, pc, i, TM_SUB);
break;
case LOP_MUL:
translateInstBinary(*this, pc, i, TM_MUL);
break;
case LOP_DIV:
translateInstBinary(*this, pc, i, TM_DIV);
break;
case LOP_MOD:
translateInstBinary(*this, pc, i, TM_MOD);
break;
case LOP_POW:
translateInstBinary(*this, pc, i, TM_POW);
break;
case LOP_ADDK:
translateInstBinaryK(*this, pc, i, TM_ADD);
break;
case LOP_SUBK:
translateInstBinaryK(*this, pc, i, TM_SUB);
break;
case LOP_MULK:
translateInstBinaryK(*this, pc, i, TM_MUL);
break;
case LOP_DIVK:
translateInstBinaryK(*this, pc, i, TM_DIV);
break;
case LOP_MODK:
translateInstBinaryK(*this, pc, i, TM_MOD);
break;
case LOP_POWK:
translateInstBinaryK(*this, pc, i, TM_POW);
break;
case LOP_NOT:
translateInstNot(*this, pc);
break;
case LOP_MINUS:
translateInstMinus(*this, pc, i);
break;
case LOP_LENGTH:
translateInstLength(*this, pc, i);
break;
case LOP_NEWTABLE:
translateInstNewTable(*this, pc, i);
break;
case LOP_DUPTABLE:
translateInstDupTable(*this, pc, i);
break;
case LOP_SETLIST:
inst(IrCmd::SETLIST, constUint(i), vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), constInt(LUAU_INSN_C(*pc) - 1), constUint(pc[1]));
break;
case LOP_GETUPVAL:
translateInstGetUpval(*this, pc, i);
break;
case LOP_SETUPVAL:
translateInstSetUpval(*this, pc, i);
break;
case LOP_CLOSEUPVALS:
translateInstCloseUpvals(*this, pc);
break;
case LOP_FASTCALL:
{
int skip = LUAU_INSN_C(*pc);
IrOp next = blockAtInst(i + skip + 2);
translateFastCallN(*this, pc, i, false, 0, {}, next);
activeFastcallFallback = true;
fastcallFallbackReturn = next;
break;
}
case LOP_FASTCALL1:
{
int skip = LUAU_INSN_C(*pc);
IrOp next = blockAtInst(i + skip + 2);
translateFastCallN(*this, pc, i, true, 1, undef(), next);
activeFastcallFallback = true;
fastcallFallbackReturn = next;
break;
}
case LOP_FASTCALL2:
{
int skip = LUAU_INSN_C(*pc);
IrOp next = blockAtInst(i + skip + 2);
translateFastCallN(*this, pc, i, true, 2, vmReg(pc[1]), next);
activeFastcallFallback = true;
fastcallFallbackReturn = next;
break;
}
case LOP_FASTCALL2K:
{
int skip = LUAU_INSN_C(*pc);
IrOp next = blockAtInst(i + skip + 2);
translateFastCallN(*this, pc, i, true, 2, vmConst(pc[1]), next);
activeFastcallFallback = true;
fastcallFallbackReturn = next;
break;
}
case LOP_FORNPREP:
translateInstForNPrep(*this, pc, i);
break;
case LOP_FORNLOOP:
translateInstForNLoop(*this, pc, i);
break;
case LOP_FORGLOOP:
{
int aux = int(pc[1]);
// We have a translation for ipairs-style traversal, general loop iteration is still too complex
if (aux < 0)
{
translateInstForGLoopIpairs(*this, pc, i);
}
else
{
int ra = LUAU_INSN_A(*pc);
IrOp loopRepeat = blockAtInst(i + 1 + LUAU_INSN_D(*pc));
IrOp loopExit = blockAtInst(i + getOpLength(LOP_FORGLOOP));
IrOp fallback = block(IrBlockKind::Fallback);
inst(IrCmd::INTERRUPT, constUint(i));
loadAndCheckTag(vmReg(ra), LUA_TNIL, fallback);
inst(IrCmd::FORGLOOP, vmReg(ra), constInt(aux), loopRepeat, loopExit);
beginBlock(fallback);
inst(IrCmd::SET_SAVEDPC, constUint(i + 1));
inst(IrCmd::FORGLOOP_FALLBACK, vmReg(ra), constInt(aux), loopRepeat, loopExit);
beginBlock(loopExit);
}
break;
}
case LOP_FORGPREP_NEXT:
translateInstForGPrepNext(*this, pc, i);
break;
case LOP_FORGPREP_INEXT:
translateInstForGPrepInext(*this, pc, i);
break;
case LOP_AND:
translateInstAndX(*this, pc, i, vmReg(LUAU_INSN_C(*pc)));
break;
case LOP_ANDK:
translateInstAndX(*this, pc, i, vmConst(LUAU_INSN_C(*pc)));
break;
case LOP_OR:
translateInstOrX(*this, pc, i, vmReg(LUAU_INSN_C(*pc)));
break;
case LOP_ORK:
translateInstOrX(*this, pc, i, vmConst(LUAU_INSN_C(*pc)));
break;
case LOP_COVERAGE:
inst(IrCmd::COVERAGE, constUint(i));
break;
case LOP_GETIMPORT:
translateInstGetImport(*this, pc, i);
break;
case LOP_CONCAT:
translateInstConcat(*this, pc, i);
break;
case LOP_CAPTURE:
translateInstCapture(*this, pc, i);
break;
case LOP_NAMECALL:
translateInstNamecall(*this, pc, i);
break;
case LOP_PREPVARARGS:
inst(IrCmd::FALLBACK_PREPVARARGS, constUint(i), constInt(LUAU_INSN_A(*pc)));
break;
case LOP_GETVARARGS:
inst(IrCmd::FALLBACK_GETVARARGS, constUint(i), vmReg(LUAU_INSN_A(*pc)), constInt(LUAU_INSN_B(*pc) - 1));
break;
case LOP_NEWCLOSURE:
inst(IrCmd::FALLBACK_NEWCLOSURE, constUint(i), vmReg(LUAU_INSN_A(*pc)), constUint(LUAU_INSN_D(*pc)));
break;
case LOP_DUPCLOSURE:
inst(IrCmd::FALLBACK_DUPCLOSURE, constUint(i), vmReg(LUAU_INSN_A(*pc)), vmConst(LUAU_INSN_D(*pc)));
break;
case LOP_FORGPREP:
{
IrOp loopStart = blockAtInst(i + 1 + LUAU_INSN_D(*pc));
inst(IrCmd::FALLBACK_FORGPREP, constUint(i), vmReg(LUAU_INSN_A(*pc)), loopStart);
break;
}
default:
LUAU_ASSERT(!"unknown instruction");
break;
}
}
bool IrBuilder::isInternalBlock(IrOp block)
{
IrBlock& target = function.blocks[block.index];
return target.kind == IrBlockKind::Internal;
}
void IrBuilder::beginBlock(IrOp block)
{
IrBlock& target = function.blocks[block.index];
activeBlockIdx = block.index;
LUAU_ASSERT(target.start == ~0u || target.start == uint32_t(function.instructions.size()));
target.start = uint32_t(function.instructions.size());
inTerminatedBlock = false;
}
void IrBuilder::loadAndCheckTag(IrOp loc, uint8_t tag, IrOp fallback)
{
inst(IrCmd::CHECK_TAG, inst(IrCmd::LOAD_TAG, loc), constTag(tag), fallback);
}
void IrBuilder::clone(const IrBlock& source, bool removeCurrentTerminator)
{
DenseHashMap<uint32_t, uint32_t> instRedir{~0u};
auto redirect = [&instRedir](IrOp& op) {
if (op.kind == IrOpKind::Inst)
{
if (const uint32_t* newIndex = instRedir.find(op.index))
op.index = *newIndex;
else
LUAU_ASSERT(!"values can only be used if they are defined in the same block");
}
};
if (removeCurrentTerminator && inTerminatedBlock)
{
IrBlock& active = function.blocks[activeBlockIdx];
IrInst& term = function.instructions[active.finish];
kill(function, term);
inTerminatedBlock = false;
}
for (uint32_t index = source.start; index <= source.finish; index++)
{
LUAU_ASSERT(index < function.instructions.size());
IrInst clone = function.instructions[index];
// Skip pseudo instructions to make clone more compact, but validate that they have no users
if (isPseudo(clone.cmd))
{
LUAU_ASSERT(clone.useCount == 0);
continue;
}
redirect(clone.a);
redirect(clone.b);
redirect(clone.c);
redirect(clone.d);
redirect(clone.e);
redirect(clone.f);
addUse(function, clone.a);
addUse(function, clone.b);
addUse(function, clone.c);
addUse(function, clone.d);
addUse(function, clone.e);
addUse(function, clone.f);
// Instructions that referenced the original will have to be adjusted to use the clone
instRedir[index] = uint32_t(function.instructions.size());
// Reconstruct the fresh clone
inst(clone.cmd, clone.a, clone.b, clone.c, clone.d, clone.e, clone.f);
}
}
IrOp IrBuilder::undef()
{
return {IrOpKind::Undef, 0};
}
IrOp IrBuilder::constBool(bool value)
{
IrConst constant;
constant.kind = IrConstKind::Bool;
constant.valueBool = value;
return constAny(constant, uint64_t(value));
}
IrOp IrBuilder::constInt(int value)
{
IrConst constant;
constant.kind = IrConstKind::Int;
constant.valueInt = value;
return constAny(constant, uint64_t(value));
}
IrOp IrBuilder::constUint(unsigned value)
{
IrConst constant;
constant.kind = IrConstKind::Uint;
constant.valueUint = value;
return constAny(constant, uint64_t(value));
}
IrOp IrBuilder::constDouble(double value)
{
IrConst constant;
constant.kind = IrConstKind::Double;
constant.valueDouble = value;
uint64_t asCommonKey;
static_assert(sizeof(asCommonKey) == sizeof(value), "Expecting double to be 64-bit");
memcpy(&asCommonKey, &value, sizeof(value));
return constAny(constant, asCommonKey);
}
IrOp IrBuilder::constTag(uint8_t value)
{
IrConst constant;
constant.kind = IrConstKind::Tag;
constant.valueTag = value;
return constAny(constant, uint64_t(value));
}
IrOp IrBuilder::constAny(IrConst constant, uint64_t asCommonKey)
{
ConstantKey key{constant.kind, asCommonKey};
if (uint32_t* cache = constantMap.find(key))
return {IrOpKind::Constant, *cache};
uint32_t index = uint32_t(function.constants.size());
function.constants.push_back(constant);
constantMap[key] = index;
return {IrOpKind::Constant, index};
}
IrOp IrBuilder::cond(IrCondition cond)
{
return {IrOpKind::Condition, uint32_t(cond)};
}
IrOp IrBuilder::inst(IrCmd cmd)
{
return inst(cmd, {}, {}, {}, {}, {}, {});
}
IrOp IrBuilder::inst(IrCmd cmd, IrOp a)
{
return inst(cmd, a, {}, {}, {}, {}, {});
}
IrOp IrBuilder::inst(IrCmd cmd, IrOp a, IrOp b)
{
return inst(cmd, a, b, {}, {}, {}, {});
}
IrOp IrBuilder::inst(IrCmd cmd, IrOp a, IrOp b, IrOp c)
{
return inst(cmd, a, b, c, {}, {}, {});
}
IrOp IrBuilder::inst(IrCmd cmd, IrOp a, IrOp b, IrOp c, IrOp d)
{
return inst(cmd, a, b, c, d, {}, {});
}
IrOp IrBuilder::inst(IrCmd cmd, IrOp a, IrOp b, IrOp c, IrOp d, IrOp e)
{
return inst(cmd, a, b, c, d, e, {});
}
IrOp IrBuilder::inst(IrCmd cmd, IrOp a, IrOp b, IrOp c, IrOp d, IrOp e, IrOp f)
{
uint32_t index = uint32_t(function.instructions.size());
function.instructions.push_back({cmd, a, b, c, d, e, f});
LUAU_ASSERT(!inTerminatedBlock);
if (isBlockTerminator(cmd))
{
function.blocks[activeBlockIdx].finish = index;
inTerminatedBlock = true;
}
return {IrOpKind::Inst, index};
}
IrOp IrBuilder::block(IrBlockKind kind)
{
if (kind == IrBlockKind::Internal && activeFastcallFallback)
kind = IrBlockKind::Fallback;
uint32_t index = uint32_t(function.blocks.size());
function.blocks.push_back(IrBlock{kind});
return IrOp{IrOpKind::Block, index};
}
IrOp IrBuilder::blockAtInst(uint32_t index)
{
uint32_t blockIndex = instIndexToBlock[index];
if (blockIndex != kNoAssociatedBlockIndex)
return IrOp{IrOpKind::Block, blockIndex};
return block(IrBlockKind::Internal);
}
IrOp IrBuilder::vmReg(uint8_t index)
{
return {IrOpKind::VmReg, index};
}
IrOp IrBuilder::vmConst(uint32_t index)
{
return {IrOpKind::VmConst, index};
}
IrOp IrBuilder::vmUpvalue(uint8_t index)
{
return {IrOpKind::VmUpvalue, index};
}
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,431 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/IrCallWrapperX64.h"
#include "Luau/AssemblyBuilderX64.h"
#include "Luau/IrRegAllocX64.h"
#include "EmitCommonX64.h"
namespace Luau
{
namespace CodeGen
{
namespace X64
{
static const std::array<OperandX64, 6> kWindowsGprOrder = {rcx, rdx, r8, r9, addr[rsp + 32], addr[rsp + 40]};
static const std::array<OperandX64, 6> kSystemvGprOrder = {rdi, rsi, rdx, rcx, r8, r9};
static const std::array<OperandX64, 4> kXmmOrder = {xmm0, xmm1, xmm2, xmm3}; // Common order for first 4 fp arguments on Windows/SystemV
static bool sameUnderlyingRegister(RegisterX64 a, RegisterX64 b)
{
SizeX64 underlyingSizeA = a.size == SizeX64::xmmword ? SizeX64::xmmword : SizeX64::qword;
SizeX64 underlyingSizeB = b.size == SizeX64::xmmword ? SizeX64::xmmword : SizeX64::qword;
return underlyingSizeA == underlyingSizeB && a.index == b.index;
}
IrCallWrapperX64::IrCallWrapperX64(IrRegAllocX64& regs, AssemblyBuilderX64& build, uint32_t instIdx)
: regs(regs)
, build(build)
, instIdx(instIdx)
, funcOp(noreg)
{
gprUses.fill(0);
xmmUses.fill(0);
}
void IrCallWrapperX64::addArgument(SizeX64 targetSize, OperandX64 source, IrOp sourceOp)
{
// Instruction operands rely on current instruction index for lifetime tracking
LUAU_ASSERT(instIdx != kInvalidInstIdx || sourceOp.kind == IrOpKind::None);
LUAU_ASSERT(argCount < kMaxCallArguments);
CallArgument& arg = args[argCount++];
arg = {targetSize, source, sourceOp};
arg.target = getNextArgumentTarget(targetSize);
if (build.abi == ABIX64::Windows)
{
// On Windows, gpr/xmm register positions move in sync
gprPos++;
xmmPos++;
}
else
{
if (targetSize == SizeX64::xmmword)
xmmPos++;
else
gprPos++;
}
}
void IrCallWrapperX64::addArgument(SizeX64 targetSize, ScopedRegX64& scopedReg)
{
addArgument(targetSize, scopedReg.release(), {});
}
void IrCallWrapperX64::call(const OperandX64& func)
{
funcOp = func;
countRegisterUses();
for (int i = 0; i < argCount; ++i)
{
CallArgument& arg = args[i];
if (arg.sourceOp.kind != IrOpKind::None)
{
if (IrInst* inst = regs.function.asInstOp(arg.sourceOp))
{
// Source registers are recorded separately from source operands in CallArgument
// If source is the last use of IrInst, clear the register from the operand
if (regs.isLastUseReg(*inst, instIdx))
inst->regX64 = noreg;
// If it's not the last use and register is volatile, register ownership is taken, which also spills the operand
else if (inst->regX64.size == SizeX64::xmmword || regs.shouldFreeGpr(inst->regX64))
regs.takeReg(inst->regX64, kInvalidInstIdx);
}
}
// Immediate values are stored at the end since they are not interfering and target register can still be used temporarily
if (arg.source.cat == CategoryX64::imm)
{
arg.candidate = false;
}
// Arguments passed through stack can be handled immediately
else if (arg.target.cat == CategoryX64::mem)
{
if (arg.source.cat == CategoryX64::mem)
{
ScopedRegX64 tmp{regs, arg.target.memSize};
freeSourceRegisters(arg);
if (arg.source.memSize == SizeX64::none)
build.lea(tmp.reg, arg.source);
else
build.mov(tmp.reg, arg.source);
build.mov(arg.target, tmp.reg);
}
else
{
freeSourceRegisters(arg);
build.mov(arg.target, arg.source);
}
arg.candidate = false;
}
// Skip arguments that are already in their place
else if (arg.source.cat == CategoryX64::reg && sameUnderlyingRegister(arg.target.base, arg.source.base))
{
freeSourceRegisters(arg);
// If target is not used as source in other arguments, prevent register allocator from giving it out
if (getRegisterUses(arg.target.base) == 0)
regs.takeReg(arg.target.base, kInvalidInstIdx);
else // Otherwise, make sure we won't free it when last source use is completed
addRegisterUse(arg.target.base);
arg.candidate = false;
}
}
// Repeat until we run out of arguments to pass
while (true)
{
// Find target argument register that is not an active source
if (CallArgument* candidate = findNonInterferingArgument())
{
// This section is only for handling register targets
LUAU_ASSERT(candidate->target.cat == CategoryX64::reg);
freeSourceRegisters(*candidate);
LUAU_ASSERT(getRegisterUses(candidate->target.base) == 0);
regs.takeReg(candidate->target.base, kInvalidInstIdx);
moveToTarget(*candidate);
candidate->candidate = false;
}
// If all registers cross-interfere (rcx <- rdx, rdx <- rcx), one has to be renamed
else if (RegisterX64 conflict = findConflictingTarget(); conflict != noreg)
{
renameConflictingRegister(conflict);
}
else
{
for (int i = 0; i < argCount; ++i)
LUAU_ASSERT(!args[i].candidate);
break;
}
}
// Handle immediate arguments last
for (int i = 0; i < argCount; ++i)
{
CallArgument& arg = args[i];
if (arg.source.cat == CategoryX64::imm)
{
// There could be a conflict with the function source register, make this argument a candidate to find it
arg.candidate = true;
if (RegisterX64 conflict = findConflictingTarget(); conflict != noreg)
renameConflictingRegister(conflict);
if (arg.target.cat == CategoryX64::reg)
regs.takeReg(arg.target.base, kInvalidInstIdx);
moveToTarget(arg);
arg.candidate = false;
}
}
// Free registers used in the function call
removeRegisterUse(funcOp.base);
removeRegisterUse(funcOp.index);
// Just before the call is made, argument registers are all marked as free in register allocator
for (int i = 0; i < argCount; ++i)
{
CallArgument& arg = args[i];
if (arg.target.cat == CategoryX64::reg)
regs.freeReg(arg.target.base);
}
regs.preserveAndFreeInstValues();
regs.assertAllFree();
build.call(funcOp);
}
RegisterX64 IrCallWrapperX64::suggestNextArgumentRegister(SizeX64 size) const
{
OperandX64 target = getNextArgumentTarget(size);
return target.cat == CategoryX64::reg ? regs.takeReg(target.base, kInvalidInstIdx) : regs.allocReg(size, kInvalidInstIdx);
}
OperandX64 IrCallWrapperX64::getNextArgumentTarget(SizeX64 size) const
{
if (size == SizeX64::xmmword)
{
LUAU_ASSERT(size_t(xmmPos) < kXmmOrder.size());
return kXmmOrder[xmmPos];
}
const std::array<OperandX64, 6>& gprOrder = build.abi == ABIX64::Windows ? kWindowsGprOrder : kSystemvGprOrder;
LUAU_ASSERT(size_t(gprPos) < gprOrder.size());
OperandX64 target = gprOrder[gprPos];
// Keep requested argument size
if (target.cat == CategoryX64::reg)
target.base.size = size;
else if (target.cat == CategoryX64::mem)
target.memSize = size;
return target;
}
void IrCallWrapperX64::countRegisterUses()
{
for (int i = 0; i < argCount; ++i)
{
addRegisterUse(args[i].source.base);
addRegisterUse(args[i].source.index);
}
addRegisterUse(funcOp.base);
addRegisterUse(funcOp.index);
}
CallArgument* IrCallWrapperX64::findNonInterferingArgument()
{
for (int i = 0; i < argCount; ++i)
{
CallArgument& arg = args[i];
if (arg.candidate && !interferesWithActiveSources(arg, i) && !interferesWithOperand(funcOp, arg.target.base))
return &arg;
}
return nullptr;
}
bool IrCallWrapperX64::interferesWithOperand(const OperandX64& op, RegisterX64 reg) const
{
return sameUnderlyingRegister(op.base, reg) || sameUnderlyingRegister(op.index, reg);
}
bool IrCallWrapperX64::interferesWithActiveSources(const CallArgument& targetArg, int targetArgIndex) const
{
for (int i = 0; i < argCount; ++i)
{
const CallArgument& arg = args[i];
if (arg.candidate && i != targetArgIndex && interferesWithOperand(arg.source, targetArg.target.base))
return true;
}
return false;
}
bool IrCallWrapperX64::interferesWithActiveTarget(RegisterX64 sourceReg) const
{
for (int i = 0; i < argCount; ++i)
{
const CallArgument& arg = args[i];
if (arg.candidate && sameUnderlyingRegister(arg.target.base, sourceReg))
return true;
}
return false;
}
void IrCallWrapperX64::moveToTarget(CallArgument& arg)
{
if (arg.source.cat == CategoryX64::reg)
{
RegisterX64 source = arg.source.base;
if (source.size == SizeX64::xmmword)
build.vmovsd(arg.target, source, source);
else
build.mov(arg.target, source);
}
else if (arg.source.cat == CategoryX64::imm)
{
build.mov(arg.target, arg.source);
}
else
{
if (arg.source.memSize == SizeX64::none)
build.lea(arg.target, arg.source);
else if (arg.target.base.size == SizeX64::xmmword && arg.source.memSize == SizeX64::xmmword)
build.vmovups(arg.target, arg.source);
else if (arg.target.base.size == SizeX64::xmmword)
build.vmovsd(arg.target, arg.source);
else
build.mov(arg.target, arg.source);
}
}
void IrCallWrapperX64::freeSourceRegisters(CallArgument& arg)
{
removeRegisterUse(arg.source.base);
removeRegisterUse(arg.source.index);
}
void IrCallWrapperX64::renameRegister(RegisterX64& target, RegisterX64 reg, RegisterX64 replacement)
{
if (sameUnderlyingRegister(target, reg))
{
addRegisterUse(replacement);
removeRegisterUse(target);
target.index = replacement.index; // Only change index, size is preserved
}
}
void IrCallWrapperX64::renameSourceRegisters(RegisterX64 reg, RegisterX64 replacement)
{
for (int i = 0; i < argCount; ++i)
{
CallArgument& arg = args[i];
if (arg.candidate)
{
renameRegister(arg.source.base, reg, replacement);
renameRegister(arg.source.index, reg, replacement);
}
}
renameRegister(funcOp.base, reg, replacement);
renameRegister(funcOp.index, reg, replacement);
}
RegisterX64 IrCallWrapperX64::findConflictingTarget() const
{
for (int i = 0; i < argCount; ++i)
{
const CallArgument& arg = args[i];
if (arg.candidate)
{
if (interferesWithActiveTarget(arg.source.base))
return arg.source.base;
if (interferesWithActiveTarget(arg.source.index))
return arg.source.index;
}
}
if (interferesWithActiveTarget(funcOp.base))
return funcOp.base;
if (interferesWithActiveTarget(funcOp.index))
return funcOp.index;
return noreg;
}
void IrCallWrapperX64::renameConflictingRegister(RegisterX64 conflict)
{
// Get a fresh register
RegisterX64 freshReg = regs.allocReg(conflict.size, kInvalidInstIdx);
if (conflict.size == SizeX64::xmmword)
build.vmovsd(freshReg, conflict, conflict);
else
build.mov(freshReg, conflict);
renameSourceRegisters(conflict, freshReg);
}
int IrCallWrapperX64::getRegisterUses(RegisterX64 reg) const
{
return reg.size == SizeX64::xmmword ? xmmUses[reg.index] : (reg.size != SizeX64::none ? gprUses[reg.index] : 0);
}
void IrCallWrapperX64::addRegisterUse(RegisterX64 reg)
{
if (reg.size == SizeX64::xmmword)
xmmUses[reg.index]++;
else if (reg.size != SizeX64::none)
gprUses[reg.index]++;
}
void IrCallWrapperX64::removeRegisterUse(RegisterX64 reg)
{
if (reg.size == SizeX64::xmmword)
{
LUAU_ASSERT(xmmUses[reg.index] != 0);
xmmUses[reg.index]--;
if (xmmUses[reg.index] == 0) // we don't use persistent xmm regs so no need to call shouldFreeRegister
regs.freeReg(reg);
}
else if (reg.size != SizeX64::none)
{
LUAU_ASSERT(gprUses[reg.index] != 0);
gprUses[reg.index]--;
if (gprUses[reg.index] == 0 && regs.shouldFreeGpr(reg))
regs.freeReg(reg);
}
}
} // namespace X64
} // namespace CodeGen
} // namespace Luau

766
luau/CodeGen/src/IrDump.cpp Normal file
View File

@ -0,0 +1,766 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/IrDump.h"
#include "Luau/IrUtils.h"
#include "lua.h"
#include <stdarg.h>
namespace Luau
{
namespace CodeGen
{
static const char* textForCondition[] = {
"eq", "not_eq", "lt", "not_lt", "le", "not_le", "gt", "not_gt", "ge", "not_ge", "u_lt", "u_le", "u_gt", "u_ge"};
static_assert(sizeof(textForCondition) / sizeof(textForCondition[0]) == size_t(IrCondition::Count), "all conditions have to be covered");
const int kDetailsAlignColumn = 60;
LUAU_PRINTF_ATTR(2, 3)
static void append(std::string& result, const char* fmt, ...)
{
char buf[256];
va_list args;
va_start(args, fmt);
vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
result.append(buf);
}
static void padToDetailColumn(std::string& result, size_t lineStart)
{
int pad = kDetailsAlignColumn - int(result.size() - lineStart);
if (pad > 0)
result.append(pad, ' ');
}
static const char* getTagName(uint8_t tag)
{
switch (tag)
{
case LUA_TNIL:
return "tnil";
case LUA_TBOOLEAN:
return "tboolean";
case LUA_TLIGHTUSERDATA:
return "tlightuserdata";
case LUA_TNUMBER:
return "tnumber";
case LUA_TVECTOR:
return "tvector";
case LUA_TSTRING:
return "tstring";
case LUA_TTABLE:
return "ttable";
case LUA_TFUNCTION:
return "tfunction";
case LUA_TUSERDATA:
return "tuserdata";
case LUA_TTHREAD:
return "tthread";
default:
LUAU_ASSERT(!"Unknown type tag");
LUAU_UNREACHABLE();
}
}
const char* getCmdName(IrCmd cmd)
{
switch (cmd)
{
case IrCmd::NOP:
return "NOP";
case IrCmd::LOAD_TAG:
return "LOAD_TAG";
case IrCmd::LOAD_POINTER:
return "LOAD_POINTER";
case IrCmd::LOAD_DOUBLE:
return "LOAD_DOUBLE";
case IrCmd::LOAD_INT:
return "LOAD_INT";
case IrCmd::LOAD_TVALUE:
return "LOAD_TVALUE";
case IrCmd::LOAD_NODE_VALUE_TV:
return "LOAD_NODE_VALUE_TV";
case IrCmd::LOAD_ENV:
return "LOAD_ENV";
case IrCmd::GET_ARR_ADDR:
return "GET_ARR_ADDR";
case IrCmd::GET_SLOT_NODE_ADDR:
return "GET_SLOT_NODE_ADDR";
case IrCmd::GET_HASH_NODE_ADDR:
return "GET_HASH_NODE_ADDR";
case IrCmd::STORE_TAG:
return "STORE_TAG";
case IrCmd::STORE_POINTER:
return "STORE_POINTER";
case IrCmd::STORE_DOUBLE:
return "STORE_DOUBLE";
case IrCmd::STORE_INT:
return "STORE_INT";
case IrCmd::STORE_VECTOR:
return "STORE_VECTOR";
case IrCmd::STORE_TVALUE:
return "STORE_TVALUE";
case IrCmd::STORE_NODE_VALUE_TV:
return "STORE_NODE_VALUE_TV";
case IrCmd::ADD_INT:
return "ADD_INT";
case IrCmd::SUB_INT:
return "SUB_INT";
case IrCmd::ADD_NUM:
return "ADD_NUM";
case IrCmd::SUB_NUM:
return "SUB_NUM";
case IrCmd::MUL_NUM:
return "MUL_NUM";
case IrCmd::DIV_NUM:
return "DIV_NUM";
case IrCmd::MOD_NUM:
return "MOD_NUM";
case IrCmd::MIN_NUM:
return "MIN_NUM";
case IrCmd::MAX_NUM:
return "MAX_NUM";
case IrCmd::UNM_NUM:
return "UNM_NUM";
case IrCmd::FLOOR_NUM:
return "FLOOR_NUM";
case IrCmd::CEIL_NUM:
return "CEIL_NUM";
case IrCmd::ROUND_NUM:
return "ROUND_NUM";
case IrCmd::SQRT_NUM:
return "SQRT_NUM";
case IrCmd::ABS_NUM:
return "ABS_NUM";
case IrCmd::NOT_ANY:
return "NOT_ANY";
case IrCmd::JUMP:
return "JUMP";
case IrCmd::JUMP_IF_TRUTHY:
return "JUMP_IF_TRUTHY";
case IrCmd::JUMP_IF_FALSY:
return "JUMP_IF_FALSY";
case IrCmd::JUMP_EQ_TAG:
return "JUMP_EQ_TAG";
case IrCmd::JUMP_EQ_INT:
return "JUMP_EQ_INT";
case IrCmd::JUMP_LT_INT:
return "JUMP_LT_INT";
case IrCmd::JUMP_GE_UINT:
return "JUMP_GE_UINT";
case IrCmd::JUMP_EQ_POINTER:
return "JUMP_EQ_POINTER";
case IrCmd::JUMP_CMP_NUM:
return "JUMP_CMP_NUM";
case IrCmd::JUMP_CMP_ANY:
return "JUMP_CMP_ANY";
case IrCmd::JUMP_SLOT_MATCH:
return "JUMP_SLOT_MATCH";
case IrCmd::TABLE_LEN:
return "TABLE_LEN";
case IrCmd::NEW_TABLE:
return "NEW_TABLE";
case IrCmd::DUP_TABLE:
return "DUP_TABLE";
case IrCmd::TRY_NUM_TO_INDEX:
return "TRY_NUM_TO_INDEX";
case IrCmd::TRY_CALL_FASTGETTM:
return "TRY_CALL_FASTGETTM";
case IrCmd::INT_TO_NUM:
return "INT_TO_NUM";
case IrCmd::UINT_TO_NUM:
return "UINT_TO_NUM";
case IrCmd::NUM_TO_INT:
return "NUM_TO_INT";
case IrCmd::NUM_TO_UINT:
return "NUM_TO_UINT";
case IrCmd::ADJUST_STACK_TO_REG:
return "ADJUST_STACK_TO_REG";
case IrCmd::ADJUST_STACK_TO_TOP:
return "ADJUST_STACK_TO_TOP";
case IrCmd::FASTCALL:
return "FASTCALL";
case IrCmd::INVOKE_FASTCALL:
return "INVOKE_FASTCALL";
case IrCmd::CHECK_FASTCALL_RES:
return "CHECK_FASTCALL_RES";
case IrCmd::DO_ARITH:
return "DO_ARITH";
case IrCmd::DO_LEN:
return "DO_LEN";
case IrCmd::GET_TABLE:
return "GET_TABLE";
case IrCmd::SET_TABLE:
return "SET_TABLE";
case IrCmd::GET_IMPORT:
return "GET_IMPORT";
case IrCmd::CONCAT:
return "CONCAT";
case IrCmd::GET_UPVALUE:
return "GET_UPVALUE";
case IrCmd::SET_UPVALUE:
return "SET_UPVALUE";
case IrCmd::PREPARE_FORN:
return "PREPARE_FORN";
case IrCmd::CHECK_TAG:
return "CHECK_TAG";
case IrCmd::CHECK_READONLY:
return "CHECK_READONLY";
case IrCmd::CHECK_NO_METATABLE:
return "CHECK_NO_METATABLE";
case IrCmd::CHECK_SAFE_ENV:
return "CHECK_SAFE_ENV";
case IrCmd::CHECK_ARRAY_SIZE:
return "CHECK_ARRAY_SIZE";
case IrCmd::CHECK_SLOT_MATCH:
return "CHECK_SLOT_MATCH";
case IrCmd::CHECK_NODE_NO_NEXT:
return "CHECK_NODE_NO_NEXT";
case IrCmd::INTERRUPT:
return "INTERRUPT";
case IrCmd::CHECK_GC:
return "CHECK_GC";
case IrCmd::BARRIER_OBJ:
return "BARRIER_OBJ";
case IrCmd::BARRIER_TABLE_BACK:
return "BARRIER_TABLE_BACK";
case IrCmd::BARRIER_TABLE_FORWARD:
return "BARRIER_TABLE_FORWARD";
case IrCmd::SET_SAVEDPC:
return "SET_SAVEDPC";
case IrCmd::CLOSE_UPVALS:
return "CLOSE_UPVALS";
case IrCmd::CAPTURE:
return "CAPTURE";
case IrCmd::SETLIST:
return "SETLIST";
case IrCmd::CALL:
return "CALL";
case IrCmd::RETURN:
return "RETURN";
case IrCmd::FORGLOOP:
return "FORGLOOP";
case IrCmd::FORGLOOP_FALLBACK:
return "FORGLOOP_FALLBACK";
case IrCmd::FORGPREP_XNEXT_FALLBACK:
return "FORGPREP_XNEXT_FALLBACK";
case IrCmd::COVERAGE:
return "COVERAGE";
case IrCmd::FALLBACK_GETGLOBAL:
return "FALLBACK_GETGLOBAL";
case IrCmd::FALLBACK_SETGLOBAL:
return "FALLBACK_SETGLOBAL";
case IrCmd::FALLBACK_GETTABLEKS:
return "FALLBACK_GETTABLEKS";
case IrCmd::FALLBACK_SETTABLEKS:
return "FALLBACK_SETTABLEKS";
case IrCmd::FALLBACK_NAMECALL:
return "FALLBACK_NAMECALL";
case IrCmd::FALLBACK_PREPVARARGS:
return "FALLBACK_PREPVARARGS";
case IrCmd::FALLBACK_GETVARARGS:
return "FALLBACK_GETVARARGS";
case IrCmd::FALLBACK_NEWCLOSURE:
return "FALLBACK_NEWCLOSURE";
case IrCmd::FALLBACK_DUPCLOSURE:
return "FALLBACK_DUPCLOSURE";
case IrCmd::FALLBACK_FORGPREP:
return "FALLBACK_FORGPREP";
case IrCmd::SUBSTITUTE:
return "SUBSTITUTE";
case IrCmd::BITAND_UINT:
return "BITAND_UINT";
case IrCmd::BITXOR_UINT:
return "BITXOR_UINT";
case IrCmd::BITOR_UINT:
return "BITOR_UINT";
case IrCmd::BITNOT_UINT:
return "BITNOT_UINT";
case IrCmd::BITLSHIFT_UINT:
return "BITLSHIFT_UINT";
case IrCmd::BITRSHIFT_UINT:
return "BITRSHIFT_UINT";
case IrCmd::BITARSHIFT_UINT:
return "BITARSHIFT_UINT";
case IrCmd::BITLROTATE_UINT:
return "BITLROTATE_UINT";
case IrCmd::BITRROTATE_UINT:
return "BITRROTATE_UINT";
case IrCmd::BITCOUNTLZ_UINT:
return "BITCOUNTLZ_UINT";
case IrCmd::BITCOUNTRZ_UINT:
return "BITCOUNTRZ_UINT";
case IrCmd::INVOKE_LIBM:
return "INVOKE_LIBM";
}
LUAU_UNREACHABLE();
}
const char* getBlockKindName(IrBlockKind kind)
{
switch (kind)
{
case IrBlockKind::Bytecode:
return "bb_bytecode";
case IrBlockKind::Fallback:
return "bb_fallback";
case IrBlockKind::Internal:
return "bb";
case IrBlockKind::Linearized:
return "bb_linear";
case IrBlockKind::Dead:
return "dead";
}
LUAU_UNREACHABLE();
}
void toString(IrToStringContext& ctx, const IrInst& inst, uint32_t index)
{
append(ctx.result, " ");
// Instructions with a result display target virtual register
if (hasResult(inst.cmd))
append(ctx.result, "%%%u = ", index);
ctx.result.append(getCmdName(inst.cmd));
auto checkOp = [&ctx](IrOp op, const char* sep) {
if (op.kind != IrOpKind::None)
{
ctx.result.append(sep);
toString(ctx, op);
}
};
checkOp(inst.a, " ");
checkOp(inst.b, ", ");
checkOp(inst.c, ", ");
checkOp(inst.d, ", ");
checkOp(inst.e, ", ");
checkOp(inst.f, ", ");
}
void toString(IrToStringContext& ctx, const IrBlock& block, uint32_t index)
{
append(ctx.result, "%s_%u", getBlockKindName(block.kind), index);
}
void toString(IrToStringContext& ctx, IrOp op)
{
switch (op.kind)
{
case IrOpKind::None:
break;
case IrOpKind::Undef:
append(ctx.result, "undef");
break;
case IrOpKind::Constant:
toString(ctx.result, ctx.constants[op.index]);
break;
case IrOpKind::Condition:
LUAU_ASSERT(op.index < uint32_t(IrCondition::Count));
ctx.result.append(textForCondition[op.index]);
break;
case IrOpKind::Inst:
append(ctx.result, "%%%u", op.index);
break;
case IrOpKind::Block:
append(ctx.result, "%s_%u", getBlockKindName(ctx.blocks[op.index].kind), op.index);
break;
case IrOpKind::VmReg:
append(ctx.result, "R%d", vmRegOp(op));
break;
case IrOpKind::VmConst:
append(ctx.result, "K%d", vmConstOp(op));
break;
case IrOpKind::VmUpvalue:
append(ctx.result, "U%d", vmUpvalueOp(op));
break;
}
}
void toString(std::string& result, IrConst constant)
{
switch (constant.kind)
{
case IrConstKind::Bool:
append(result, constant.valueBool ? "true" : "false");
break;
case IrConstKind::Int:
append(result, "%di", constant.valueInt);
break;
case IrConstKind::Uint:
append(result, "%uu", constant.valueUint);
break;
case IrConstKind::Double:
if (constant.valueDouble != constant.valueDouble)
append(result, "nan");
else
append(result, "%.17g", constant.valueDouble);
break;
case IrConstKind::Tag:
result.append(getTagName(constant.valueTag));
break;
}
}
static void appendBlockSet(IrToStringContext& ctx, BlockIteratorWrapper blocks)
{
bool comma = false;
for (uint32_t target : blocks)
{
if (comma)
append(ctx.result, ", ");
comma = true;
toString(ctx, ctx.blocks[target], target);
}
}
static void appendRegisterSet(IrToStringContext& ctx, const RegisterSet& rs, const char* separator)
{
bool comma = false;
for (size_t i = 0; i < rs.regs.size(); i++)
{
if (rs.regs.test(i))
{
if (comma)
ctx.result.append(separator);
comma = true;
append(ctx.result, "R%d", int(i));
}
}
if (rs.varargSeq)
{
if (comma)
ctx.result.append(separator);
append(ctx.result, "R%d...", rs.varargStart);
}
}
static RegisterSet getJumpTargetExtraLiveIn(IrToStringContext& ctx, const IrBlock& block, uint32_t blockIdx, const IrInst& inst)
{
RegisterSet extraRs;
if (blockIdx >= ctx.cfg.in.size())
return extraRs;
const RegisterSet& defRs = ctx.cfg.in[blockIdx];
// Find first block argument, for guard instructions (isNonTerminatingJump), that's the first and only one
LUAU_ASSERT(isNonTerminatingJump(inst.cmd));
IrOp op = inst.a;
if (inst.b.kind == IrOpKind::Block)
op = inst.b;
else if (inst.c.kind == IrOpKind::Block)
op = inst.c;
else if (inst.d.kind == IrOpKind::Block)
op = inst.d;
else if (inst.e.kind == IrOpKind::Block)
op = inst.e;
else if (inst.f.kind == IrOpKind::Block)
op = inst.f;
if (op.kind == IrOpKind::Block && op.index < ctx.cfg.in.size())
{
const RegisterSet& inRs = ctx.cfg.in[op.index];
extraRs.regs = inRs.regs & ~defRs.regs;
if (inRs.varargSeq)
requireVariadicSequence(extraRs, defRs, inRs.varargStart);
}
return extraRs;
}
void toStringDetailed(IrToStringContext& ctx, const IrBlock& block, uint32_t blockIdx, const IrInst& inst, uint32_t instIdx, bool includeUseInfo)
{
size_t start = ctx.result.size();
toString(ctx, inst, instIdx);
if (includeUseInfo)
{
padToDetailColumn(ctx.result, start);
if (inst.useCount == 0 && hasSideEffects(inst.cmd))
{
if (isNonTerminatingJump(inst.cmd))
{
RegisterSet extraRs = getJumpTargetExtraLiveIn(ctx, block, blockIdx, inst);
if (extraRs.regs.any() || extraRs.varargSeq)
{
append(ctx.result, "; %%%u, extra in: ", instIdx);
appendRegisterSet(ctx, extraRs, ", ");
ctx.result.append("\n");
}
else
{
append(ctx.result, "; %%%u\n", instIdx);
}
}
else
{
append(ctx.result, "; %%%u\n", instIdx);
}
}
else
{
append(ctx.result, "; useCount: %d, lastUse: %%%u\n", inst.useCount, inst.lastUse);
}
}
else
{
ctx.result.append("\n");
}
}
void toStringDetailed(IrToStringContext& ctx, const IrBlock& block, uint32_t index, bool includeUseInfo)
{
// Report captured registers for entry block
if (block.useCount == 0 && block.kind != IrBlockKind::Dead && ctx.cfg.captured.regs.any())
{
append(ctx.result, "; captured regs: ");
appendRegisterSet(ctx, ctx.cfg.captured, ", ");
append(ctx.result, "\n\n");
}
size_t start = ctx.result.size();
toString(ctx, block, index);
append(ctx.result, ":");
if (includeUseInfo)
{
padToDetailColumn(ctx.result, start);
append(ctx.result, "; useCount: %d\n", block.useCount);
}
else
{
ctx.result.append("\n");
}
// Predecessor list
if (index < ctx.cfg.predecessorsOffsets.size())
{
BlockIteratorWrapper pred = predecessors(ctx.cfg, index);
if (!pred.empty())
{
append(ctx.result, "; predecessors: ");
appendBlockSet(ctx, pred);
append(ctx.result, "\n");
}
}
// Successor list
if (index < ctx.cfg.successorsOffsets.size())
{
BlockIteratorWrapper succ = successors(ctx.cfg, index);
if (!succ.empty())
{
append(ctx.result, "; successors: ");
appendBlockSet(ctx, succ);
append(ctx.result, "\n");
}
}
// Live-in VM regs
if (index < ctx.cfg.in.size())
{
const RegisterSet& in = ctx.cfg.in[index];
if (in.regs.any() || in.varargSeq)
{
append(ctx.result, "; in regs: ");
appendRegisterSet(ctx, in, ", ");
append(ctx.result, "\n");
}
}
// Live-out VM regs
if (index < ctx.cfg.out.size())
{
const RegisterSet& out = ctx.cfg.out[index];
if (out.regs.any() || out.varargSeq)
{
append(ctx.result, "; out regs: ");
appendRegisterSet(ctx, out, ", ");
append(ctx.result, "\n");
}
}
}
std::string toString(const IrFunction& function, bool includeUseInfo)
{
std::string result;
IrToStringContext ctx{result, function.blocks, function.constants, function.cfg};
for (size_t i = 0; i < function.blocks.size(); i++)
{
const IrBlock& block = function.blocks[i];
if (block.kind == IrBlockKind::Dead)
continue;
toStringDetailed(ctx, block, uint32_t(i), includeUseInfo);
if (block.start == ~0u)
{
append(ctx.result, " *empty*\n\n");
continue;
}
// To allow dumping blocks that are still being constructed, we can't rely on terminator and need a bounds check
for (uint32_t index = block.start; index <= block.finish && index < uint32_t(function.instructions.size()); index++)
{
const IrInst& inst = function.instructions[index];
// Skip pseudo instructions unless they are still referenced
if (isPseudo(inst.cmd) && inst.useCount == 0)
continue;
append(ctx.result, " ");
toStringDetailed(ctx, block, uint32_t(i), inst, index, includeUseInfo);
}
append(ctx.result, "\n");
}
return result;
}
std::string dump(const IrFunction& function)
{
std::string result = toString(function, /* includeUseInfo */ true);
printf("%s\n", result.c_str());
return result;
}
std::string toDot(const IrFunction& function, bool includeInst)
{
std::string result;
IrToStringContext ctx{result, function.blocks, function.constants, function.cfg};
auto appendLabelRegset = [&ctx](const std::vector<RegisterSet>& regSets, size_t blockIdx, const char* name) {
if (blockIdx < regSets.size())
{
const RegisterSet& rs = regSets[blockIdx];
if (rs.regs.any() || rs.varargSeq)
{
append(ctx.result, "|{%s|", name);
appendRegisterSet(ctx, rs, "|");
append(ctx.result, "}");
}
}
};
append(ctx.result, "digraph CFG {\n");
append(ctx.result, "node[shape=record]\n");
for (size_t i = 0; i < function.blocks.size(); i++)
{
const IrBlock& block = function.blocks[i];
append(ctx.result, "b%u [", unsigned(i));
if (block.kind == IrBlockKind::Fallback)
append(ctx.result, "style=filled;fillcolor=salmon;");
else if (block.kind == IrBlockKind::Bytecode)
append(ctx.result, "style=filled;fillcolor=palegreen;");
append(ctx.result, "label=\"{");
toString(ctx, block, uint32_t(i));
appendLabelRegset(ctx.cfg.in, i, "in");
if (includeInst && block.start != ~0u)
{
for (uint32_t instIdx = block.start; instIdx <= block.finish; instIdx++)
{
const IrInst& inst = function.instructions[instIdx];
// Skip pseudo instructions unless they are still referenced
if (isPseudo(inst.cmd) && inst.useCount == 0)
continue;
append(ctx.result, "|");
toString(ctx, inst, instIdx);
}
}
appendLabelRegset(ctx.cfg.def, i, "def");
appendLabelRegset(ctx.cfg.out, i, "out");
append(ctx.result, "}\"];\n");
}
for (size_t i = 0; i < function.blocks.size(); i++)
{
const IrBlock& block = function.blocks[i];
if (block.start == ~0u)
continue;
for (uint32_t instIdx = block.start; instIdx != ~0u && instIdx <= block.finish; instIdx++)
{
const IrInst& inst = function.instructions[instIdx];
auto checkOp = [&](IrOp op) {
if (op.kind == IrOpKind::Block)
{
if (function.blocks[op.index].kind != IrBlockKind::Fallback)
append(ctx.result, "b%u -> b%u [weight=10];\n", unsigned(i), op.index);
else
append(ctx.result, "b%u -> b%u;\n", unsigned(i), op.index);
}
};
checkOp(inst.a);
checkOp(inst.b);
checkOp(inst.c);
checkOp(inst.d);
checkOp(inst.e);
checkOp(inst.f);
}
}
append(ctx.result, "}\n");
return result;
}
std::string dumpDot(const IrFunction& function, bool includeInst)
{
std::string result = toDot(function, includeInst);
printf("%s\n", result.c_str());
return result;
}
} // namespace CodeGen
} // namespace Luau

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,75 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/AssemblyBuilderA64.h"
#include "Luau/IrData.h"
#include "IrRegAllocA64.h"
#include "IrValueLocationTracking.h"
#include <vector>
struct Proto;
namespace Luau
{
namespace CodeGen
{
struct ModuleHelpers;
struct NativeState;
struct AssemblyOptions;
namespace A64
{
struct IrLoweringA64
{
IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, NativeState& data, Proto* proto, IrFunction& function);
void lowerInst(IrInst& inst, uint32_t index, IrBlock& next);
void finishBlock();
bool hasError() const;
bool isFallthroughBlock(IrBlock target, IrBlock next);
void jumpOrFallthrough(IrBlock& target, IrBlock& next);
// Operand data build helpers
// May emit data/address synthesis instructions
RegisterA64 tempDouble(IrOp op);
RegisterA64 tempInt(IrOp op);
RegisterA64 tempUint(IrOp op);
AddressA64 tempAddr(IrOp op, int offset);
// May emit restore instructions
RegisterA64 regOp(IrOp op);
// Operand data lookup helpers
IrConst constOp(IrOp op) const;
uint8_t tagOp(IrOp op) const;
bool boolOp(IrOp op) const;
int intOp(IrOp op) const;
unsigned uintOp(IrOp op) const;
double doubleOp(IrOp op) const;
IrBlock& blockOp(IrOp op) const;
Label& labelOp(IrOp op) const;
AssemblyBuilderA64& build;
ModuleHelpers& helpers;
NativeState& data;
Proto* proto = nullptr; // Temporarily required to provide 'Instruction* pc' to old emitInst* methods
IrFunction& function;
IrRegAllocA64 regs;
IrValueLocationTracking valueTracker;
bool error = false;
};
} // namespace A64
} // namespace CodeGen
} // namespace Luau

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,69 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/AssemblyBuilderX64.h"
#include "Luau/IrData.h"
#include "Luau/IrRegAllocX64.h"
#include "IrValueLocationTracking.h"
#include <vector>
struct Proto;
namespace Luau
{
namespace CodeGen
{
struct ModuleHelpers;
struct NativeState;
struct AssemblyOptions;
namespace X64
{
struct IrLoweringX64
{
IrLoweringX64(AssemblyBuilderX64& build, ModuleHelpers& helpers, NativeState& data, IrFunction& function);
void lowerInst(IrInst& inst, uint32_t index, IrBlock& next);
void finishBlock();
bool hasError() const;
bool isFallthroughBlock(IrBlock target, IrBlock next);
void jumpOrFallthrough(IrBlock& target, IrBlock& next);
void storeDoubleAsFloat(OperandX64 dst, IrOp src);
// Operand data lookup helpers
OperandX64 memRegDoubleOp(IrOp op);
OperandX64 memRegUintOp(IrOp op);
OperandX64 memRegTagOp(IrOp op);
RegisterX64 regOp(IrOp op);
IrConst constOp(IrOp op) const;
uint8_t tagOp(IrOp op) const;
bool boolOp(IrOp op) const;
int intOp(IrOp op) const;
unsigned uintOp(IrOp op) const;
double doubleOp(IrOp op) const;
IrBlock& blockOp(IrOp op) const;
Label& labelOp(IrOp op) const;
AssemblyBuilderX64& build;
ModuleHelpers& helpers;
NativeState& data;
IrFunction& function;
IrRegAllocX64 regs;
IrValueLocationTracking valueTracker;
};
} // namespace X64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,435 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "IrRegAllocA64.h"
#include "Luau/AssemblyBuilderA64.h"
#include "Luau/IrUtils.h"
#include "BitUtils.h"
#include "EmitCommonA64.h"
#include <string.h>
LUAU_FASTFLAGVARIABLE(DebugLuauCodegenChaosA64, false)
namespace Luau
{
namespace CodeGen
{
namespace A64
{
static int allocSpill(uint32_t& free, KindA64 kind)
{
LUAU_ASSERT(kStackSize <= 256); // to support larger stack frames, we need to ensure qN is allocated at 16b boundary to fit in ldr/str encoding
// qN registers use two consecutive slots
int slot = countrz(kind == KindA64::q ? free & (free >> 1) : free);
if (slot == 32)
return -1;
uint32_t mask = (kind == KindA64::q ? 3u : 1u) << slot;
LUAU_ASSERT((free & mask) == mask);
free &= ~mask;
return slot;
}
static void freeSpill(uint32_t& free, KindA64 kind, uint8_t slot)
{
// qN registers use two consecutive slots
uint32_t mask = (kind == KindA64::q ? 3u : 1u) << slot;
LUAU_ASSERT((free & mask) == 0);
free |= mask;
}
static int getReloadOffset(IrCmd cmd)
{
switch (getCmdValueKind(cmd))
{
case IrValueKind::Unknown:
case IrValueKind::None:
LUAU_ASSERT(!"Invalid operand restore value kind");
break;
case IrValueKind::Tag:
return offsetof(TValue, tt);
case IrValueKind::Int:
return offsetof(TValue, value);
case IrValueKind::Pointer:
return offsetof(TValue, value.gc);
case IrValueKind::Double:
return offsetof(TValue, value.n);
case IrValueKind::Tvalue:
return 0;
}
LUAU_ASSERT(!"Invalid operand restore value kind");
LUAU_UNREACHABLE();
}
static AddressA64 getReloadAddress(const IrFunction& function, const IrInst& inst)
{
IrOp location = function.findRestoreOp(inst);
if (location.kind == IrOpKind::VmReg)
return mem(rBase, vmRegOp(location) * sizeof(TValue) + getReloadOffset(inst.cmd));
// loads are 4/8/16 bytes; we conservatively limit the offset to fit assuming a 4b index
if (location.kind == IrOpKind::VmConst && vmConstOp(location) * sizeof(TValue) <= AddressA64::kMaxOffset * 4)
return mem(rConstants, vmConstOp(location) * sizeof(TValue) + getReloadOffset(inst.cmd));
return AddressA64(xzr); // dummy
}
static void restoreInst(AssemblyBuilderA64& build, uint32_t& freeSpillSlots, IrFunction& function, const IrRegAllocA64::Spill& s, RegisterA64 reg)
{
IrInst& inst = function.instructions[s.inst];
LUAU_ASSERT(inst.regA64 == noreg);
if (s.slot >= 0)
{
build.ldr(reg, mem(sp, sSpillArea.data + s.slot * 8));
freeSpill(freeSpillSlots, reg.kind, s.slot);
}
else
{
LUAU_ASSERT(!inst.spilled && inst.needsReload);
AddressA64 addr = getReloadAddress(function, function.instructions[s.inst]);
LUAU_ASSERT(addr.base != xzr);
build.ldr(reg, addr);
}
inst.spilled = false;
inst.needsReload = false;
inst.regA64 = reg;
}
IrRegAllocA64::IrRegAllocA64(IrFunction& function, std::initializer_list<std::pair<RegisterA64, RegisterA64>> regs)
: function(function)
{
for (auto& p : regs)
{
LUAU_ASSERT(p.first.kind == p.second.kind && p.first.index <= p.second.index);
Set& set = getSet(p.first.kind);
for (int i = p.first.index; i <= p.second.index; ++i)
set.base |= 1u << i;
}
gpr.free = gpr.base;
simd.free = simd.base;
memset(gpr.defs, -1, sizeof(gpr.defs));
memset(simd.defs, -1, sizeof(simd.defs));
LUAU_ASSERT(kSpillSlots <= 32);
freeSpillSlots = (kSpillSlots == 32) ? ~0u : (1u << kSpillSlots) - 1;
}
RegisterA64 IrRegAllocA64::allocReg(KindA64 kind, uint32_t index)
{
Set& set = getSet(kind);
if (set.free == 0)
{
// TODO: remember the error and fail lowering
LUAU_ASSERT(!"Out of registers to allocate");
return noreg;
}
int reg = 31 - countlz(set.free);
if (FFlag::DebugLuauCodegenChaosA64)
reg = countrz(set.free); // allocate from low end; this causes extra conflicts for calls
set.free &= ~(1u << reg);
set.defs[reg] = index;
return RegisterA64{kind, uint8_t(reg)};
}
RegisterA64 IrRegAllocA64::allocTemp(KindA64 kind)
{
Set& set = getSet(kind);
if (set.free == 0)
{
// TODO: remember the error and fail lowering
LUAU_ASSERT(!"Out of registers to allocate");
return noreg;
}
int reg = 31 - countlz(set.free);
if (FFlag::DebugLuauCodegenChaosA64)
reg = countrz(set.free); // allocate from low end; this causes extra conflicts for calls
set.free &= ~(1u << reg);
set.temp |= 1u << reg;
LUAU_ASSERT(set.defs[reg] == kInvalidInstIdx);
return RegisterA64{kind, uint8_t(reg)};
}
RegisterA64 IrRegAllocA64::allocReuse(KindA64 kind, uint32_t index, std::initializer_list<IrOp> oprefs)
{
for (IrOp op : oprefs)
{
if (op.kind != IrOpKind::Inst)
continue;
IrInst& source = function.instructions[op.index];
if (source.lastUse == index && !source.reusedReg && source.regA64 != noreg)
{
LUAU_ASSERT(!source.spilled && !source.needsReload);
LUAU_ASSERT(source.regA64.kind == kind);
Set& set = getSet(kind);
LUAU_ASSERT(set.defs[source.regA64.index] == op.index);
set.defs[source.regA64.index] = index;
source.reusedReg = true;
return source.regA64;
}
}
return allocReg(kind, index);
}
RegisterA64 IrRegAllocA64::takeReg(RegisterA64 reg, uint32_t index)
{
Set& set = getSet(reg.kind);
LUAU_ASSERT(set.free & (1u << reg.index));
LUAU_ASSERT(set.defs[reg.index] == kInvalidInstIdx);
set.free &= ~(1u << reg.index);
set.defs[reg.index] = index;
return reg;
}
void IrRegAllocA64::freeReg(RegisterA64 reg)
{
Set& set = getSet(reg.kind);
LUAU_ASSERT((set.base & (1u << reg.index)) != 0);
LUAU_ASSERT((set.free & (1u << reg.index)) == 0);
LUAU_ASSERT((set.temp & (1u << reg.index)) == 0);
set.free |= 1u << reg.index;
set.defs[reg.index] = kInvalidInstIdx;
}
void IrRegAllocA64::freeLastUseReg(IrInst& target, uint32_t index)
{
if (target.lastUse == index && !target.reusedReg)
{
LUAU_ASSERT(!target.spilled && !target.needsReload);
// Register might have already been freed if it had multiple uses inside a single instruction
if (target.regA64 == noreg)
return;
freeReg(target.regA64);
target.regA64 = noreg;
}
}
void IrRegAllocA64::freeLastUseRegs(const IrInst& inst, uint32_t index)
{
auto checkOp = [this, index](IrOp op) {
if (op.kind == IrOpKind::Inst)
freeLastUseReg(function.instructions[op.index], index);
};
checkOp(inst.a);
checkOp(inst.b);
checkOp(inst.c);
checkOp(inst.d);
checkOp(inst.e);
checkOp(inst.f);
}
void IrRegAllocA64::freeTempRegs()
{
LUAU_ASSERT((gpr.free & gpr.temp) == 0);
gpr.free |= gpr.temp;
gpr.temp = 0;
LUAU_ASSERT((simd.free & simd.temp) == 0);
simd.free |= simd.temp;
simd.temp = 0;
}
size_t IrRegAllocA64::spill(AssemblyBuilderA64& build, uint32_t index, std::initializer_list<RegisterA64> live)
{
static const KindA64 sets[] = {KindA64::x, KindA64::q};
size_t start = spills.size();
uint32_t poisongpr = 0;
uint32_t poisonsimd = 0;
if (FFlag::DebugLuauCodegenChaosA64)
{
poisongpr = gpr.base & ~gpr.free;
poisonsimd = simd.base & ~simd.free;
for (RegisterA64 reg : live)
{
Set& set = getSet(reg.kind);
(&set == &simd ? poisonsimd : poisongpr) &= ~(1u << reg.index);
}
}
for (KindA64 kind : sets)
{
Set& set = getSet(kind);
// early-out
if (set.free == set.base)
continue;
// free all temp registers
LUAU_ASSERT((set.free & set.temp) == 0);
set.free |= set.temp;
set.temp = 0;
// spill all allocated registers unless they aren't used anymore
uint32_t regs = set.base & ~set.free;
while (regs)
{
int reg = 31 - countlz(regs);
uint32_t inst = set.defs[reg];
LUAU_ASSERT(inst != kInvalidInstIdx);
IrInst& def = function.instructions[inst];
LUAU_ASSERT(def.regA64.index == reg);
LUAU_ASSERT(!def.reusedReg);
LUAU_ASSERT(!def.spilled);
LUAU_ASSERT(!def.needsReload);
if (def.lastUse == index)
{
// instead of spilling the register to never reload it, we assume the register is not needed anymore
}
else if (getReloadAddress(function, def).base != xzr)
{
// instead of spilling the register to stack, we can reload it from VM stack/constants
// we still need to record the spill for restore(start) to work
Spill s = {inst, def.regA64, -1};
spills.push_back(s);
def.needsReload = true;
}
else
{
int slot = allocSpill(freeSpillSlots, def.regA64.kind);
LUAU_ASSERT(slot >= 0); // TODO: remember the error and fail lowering
build.str(def.regA64, mem(sp, sSpillArea.data + slot * 8));
Spill s = {inst, def.regA64, int8_t(slot)};
spills.push_back(s);
def.spilled = true;
}
def.regA64 = noreg;
regs &= ~(1u << reg);
set.free |= 1u << reg;
set.defs[reg] = kInvalidInstIdx;
}
LUAU_ASSERT(set.free == set.base);
}
if (FFlag::DebugLuauCodegenChaosA64)
{
for (int reg = 0; reg < 32; ++reg)
{
if (poisongpr & (1u << reg))
build.mov(RegisterA64{KindA64::x, uint8_t(reg)}, 0xdead);
if (poisonsimd & (1u << reg))
build.fmov(RegisterA64{KindA64::d, uint8_t(reg)}, -0.125);
}
}
return start;
}
void IrRegAllocA64::restore(AssemblyBuilderA64& build, size_t start)
{
LUAU_ASSERT(start <= spills.size());
if (start < spills.size())
{
for (size_t i = start; i < spills.size(); ++i)
{
Spill s = spills[i]; // copy in case takeReg reallocates spills
RegisterA64 reg = takeReg(s.origin, s.inst);
restoreInst(build, freeSpillSlots, function, s, reg);
}
spills.resize(start);
}
}
void IrRegAllocA64::restoreReg(AssemblyBuilderA64& build, IrInst& inst)
{
uint32_t index = function.getInstIndex(inst);
for (size_t i = 0; i < spills.size(); ++i)
{
if (spills[i].inst == index)
{
Spill s = spills[i]; // copy in case allocReg reallocates spills
RegisterA64 reg = allocReg(s.origin.kind, index);
restoreInst(build, freeSpillSlots, function, s, reg);
spills[i] = spills.back();
spills.pop_back();
return;
}
}
LUAU_ASSERT(!"Expected to find a spill record");
}
void IrRegAllocA64::assertNoSpills() const
{
LUAU_ASSERT(spills.empty());
}
IrRegAllocA64::Set& IrRegAllocA64::getSet(KindA64 kind)
{
switch (kind)
{
case KindA64::x:
case KindA64::w:
return gpr;
case KindA64::s:
case KindA64::d:
case KindA64::q:
return simd;
default:
LUAU_ASSERT(!"Unexpected register kind");
LUAU_UNREACHABLE();
}
}
} // namespace A64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,84 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/IrData.h"
#include "Luau/RegisterA64.h"
#include <initializer_list>
#include <utility>
#include <vector>
namespace Luau
{
namespace CodeGen
{
namespace A64
{
class AssemblyBuilderA64;
struct IrRegAllocA64
{
IrRegAllocA64(IrFunction& function, std::initializer_list<std::pair<RegisterA64, RegisterA64>> regs);
RegisterA64 allocReg(KindA64 kind, uint32_t index);
RegisterA64 allocTemp(KindA64 kind);
RegisterA64 allocReuse(KindA64 kind, uint32_t index, std::initializer_list<IrOp> oprefs);
RegisterA64 takeReg(RegisterA64 reg, uint32_t index);
void freeReg(RegisterA64 reg);
void freeLastUseReg(IrInst& target, uint32_t index);
void freeLastUseRegs(const IrInst& inst, uint32_t index);
void freeTempRegs();
// Spills all live registers that outlive current instruction; all allocated registers are assumed to be undefined
size_t spill(AssemblyBuilderA64& build, uint32_t index, std::initializer_list<RegisterA64> live = {});
// Restores registers starting from the offset returned by spill(); all spills will be restored to the original registers
void restore(AssemblyBuilderA64& build, size_t start);
// Restores register for a single instruction; may not assign the previously used register!
void restoreReg(AssemblyBuilderA64& build, IrInst& inst);
void assertNoSpills() const;
struct Set
{
// which registers are in the set that the allocator manages (initialized at construction)
uint32_t base = 0;
// which subset of initial set is free
uint32_t free = 0;
// which subset of initial set is allocated as temporary
uint32_t temp = 0;
// which instruction is defining which register (for spilling); only valid if not free and not temp
uint32_t defs[32];
};
struct Spill
{
uint32_t inst;
RegisterA64 origin;
int8_t slot;
};
Set& getSet(KindA64 kind);
IrFunction& function;
Set gpr, simd;
std::vector<Spill> spills;
// which 8-byte slots are free
uint32_t freeSpillSlots = 0;
};
} // namespace A64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,492 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/IrRegAllocX64.h"
#include "Luau/IrUtils.h"
#include "EmitCommonX64.h"
namespace Luau
{
namespace CodeGen
{
namespace X64
{
static const RegisterX64 kGprAllocOrder[] = {rax, rdx, rcx, rbx, rsi, rdi, r8, r9, r10, r11};
IrRegAllocX64::IrRegAllocX64(AssemblyBuilderX64& build, IrFunction& function)
: build(build)
, function(function)
{
freeGprMap.fill(true);
gprInstUsers.fill(kInvalidInstIdx);
freeXmmMap.fill(true);
xmmInstUsers.fill(kInvalidInstIdx);
}
RegisterX64 IrRegAllocX64::allocReg(SizeX64 size, uint32_t instIdx)
{
if (size == SizeX64::xmmword)
{
for (size_t i = 0; i < freeXmmMap.size(); ++i)
{
if (freeXmmMap[i])
{
freeXmmMap[i] = false;
xmmInstUsers[i] = instIdx;
return RegisterX64{size, uint8_t(i)};
}
}
}
else
{
for (RegisterX64 reg : kGprAllocOrder)
{
if (freeGprMap[reg.index])
{
freeGprMap[reg.index] = false;
gprInstUsers[reg.index] = instIdx;
return RegisterX64{size, reg.index};
}
}
}
// Out of registers, spill the value with the furthest next use
const std::array<uint32_t, 16>& regInstUsers = size == SizeX64::xmmword ? xmmInstUsers : gprInstUsers;
if (uint32_t furthestUseTarget = findInstructionWithFurthestNextUse(regInstUsers); furthestUseTarget != kInvalidInstIdx)
return takeReg(function.instructions[furthestUseTarget].regX64, instIdx);
LUAU_ASSERT(!"Out of registers to allocate");
return noreg;
}
RegisterX64 IrRegAllocX64::allocRegOrReuse(SizeX64 size, uint32_t instIdx, std::initializer_list<IrOp> oprefs)
{
for (IrOp op : oprefs)
{
if (op.kind != IrOpKind::Inst)
continue;
IrInst& source = function.instructions[op.index];
if (source.lastUse == instIdx && !source.reusedReg && !source.spilled && !source.needsReload)
{
// Not comparing size directly because we only need matching register set
if ((size == SizeX64::xmmword) != (source.regX64.size == SizeX64::xmmword))
continue;
LUAU_ASSERT(source.regX64 != noreg);
source.reusedReg = true;
if (size == SizeX64::xmmword)
xmmInstUsers[source.regX64.index] = instIdx;
else
gprInstUsers[source.regX64.index] = instIdx;
return RegisterX64{size, source.regX64.index};
}
}
return allocReg(size, instIdx);
}
RegisterX64 IrRegAllocX64::takeReg(RegisterX64 reg, uint32_t instIdx)
{
if (reg.size == SizeX64::xmmword)
{
if (!freeXmmMap[reg.index])
{
LUAU_ASSERT(xmmInstUsers[reg.index] != kInvalidInstIdx);
preserve(function.instructions[xmmInstUsers[reg.index]]);
}
LUAU_ASSERT(freeXmmMap[reg.index]);
freeXmmMap[reg.index] = false;
xmmInstUsers[reg.index] = instIdx;
}
else
{
if (!freeGprMap[reg.index])
{
LUAU_ASSERT(gprInstUsers[reg.index] != kInvalidInstIdx);
preserve(function.instructions[gprInstUsers[reg.index]]);
}
LUAU_ASSERT(freeGprMap[reg.index]);
freeGprMap[reg.index] = false;
gprInstUsers[reg.index] = instIdx;
}
return reg;
}
void IrRegAllocX64::freeReg(RegisterX64 reg)
{
if (reg.size == SizeX64::xmmword)
{
LUAU_ASSERT(!freeXmmMap[reg.index]);
freeXmmMap[reg.index] = true;
xmmInstUsers[reg.index] = kInvalidInstIdx;
}
else
{
LUAU_ASSERT(!freeGprMap[reg.index]);
freeGprMap[reg.index] = true;
gprInstUsers[reg.index] = kInvalidInstIdx;
}
}
void IrRegAllocX64::freeLastUseReg(IrInst& target, uint32_t instIdx)
{
if (isLastUseReg(target, instIdx))
{
LUAU_ASSERT(!target.spilled && !target.needsReload);
// Register might have already been freed if it had multiple uses inside a single instruction
if (target.regX64 == noreg)
return;
freeReg(target.regX64);
target.regX64 = noreg;
}
}
void IrRegAllocX64::freeLastUseRegs(const IrInst& inst, uint32_t instIdx)
{
auto checkOp = [this, instIdx](IrOp op) {
if (op.kind == IrOpKind::Inst)
freeLastUseReg(function.instructions[op.index], instIdx);
};
checkOp(inst.a);
checkOp(inst.b);
checkOp(inst.c);
checkOp(inst.d);
checkOp(inst.e);
checkOp(inst.f);
}
bool IrRegAllocX64::isLastUseReg(const IrInst& target, uint32_t instIdx) const
{
return target.lastUse == instIdx && !target.reusedReg;
}
void IrRegAllocX64::preserve(IrInst& inst)
{
IrSpillX64 spill;
spill.instIdx = function.getInstIndex(inst);
spill.valueKind = getCmdValueKind(inst.cmd);
spill.spillId = nextSpillId++;
spill.originalLoc = inst.regX64;
// Loads from VmReg/VmConst don't have to be spilled, they can be restored from a register later
if (!hasRestoreOp(inst))
{
unsigned i = findSpillStackSlot(spill.valueKind);
if (spill.valueKind == IrValueKind::Tvalue)
build.vmovups(xmmword[sSpillArea + i * 8], inst.regX64);
else if (spill.valueKind == IrValueKind::Double)
build.vmovsd(qword[sSpillArea + i * 8], inst.regX64);
else if (spill.valueKind == IrValueKind::Pointer)
build.mov(qword[sSpillArea + i * 8], inst.regX64);
else if (spill.valueKind == IrValueKind::Tag || spill.valueKind == IrValueKind::Int)
build.mov(dword[sSpillArea + i * 8], inst.regX64);
else
LUAU_ASSERT(!"unsupported value kind");
usedSpillSlots.set(i);
if (i + 1 > maxUsedSlot)
maxUsedSlot = i + 1;
if (spill.valueKind == IrValueKind::Tvalue)
{
usedSpillSlots.set(i + 1);
if (i + 2 > maxUsedSlot)
maxUsedSlot = i + 2;
}
spill.stackSlot = uint8_t(i);
inst.spilled = true;
}
else
{
inst.needsReload = true;
}
spills.push_back(spill);
freeReg(inst.regX64);
inst.regX64 = noreg;
}
void IrRegAllocX64::restore(IrInst& inst, bool intoOriginalLocation)
{
uint32_t instIdx = function.getInstIndex(inst);
for (size_t i = 0; i < spills.size(); i++)
{
if (spills[i].instIdx == instIdx)
{
RegisterX64 reg = intoOriginalLocation ? takeReg(spills[i].originalLoc, instIdx) : allocReg(spills[i].originalLoc.size, instIdx);
OperandX64 restoreLocation = noreg;
// Previous call might have relocated the spill vector, so this reference can't be taken earlier
const IrSpillX64& spill = spills[i];
if (spill.stackSlot != kNoStackSlot)
{
restoreLocation = addr[sSpillArea + spill.stackSlot * 8];
restoreLocation.memSize = reg.size;
usedSpillSlots.set(spill.stackSlot, false);
if (spill.valueKind == IrValueKind::Tvalue)
usedSpillSlots.set(spill.stackSlot + 1, false);
}
else
{
restoreLocation = getRestoreAddress(inst, getRestoreOp(inst));
}
if (spill.valueKind == IrValueKind::Tvalue)
build.vmovups(reg, restoreLocation);
else if (spill.valueKind == IrValueKind::Double)
build.vmovsd(reg, restoreLocation);
else
build.mov(reg, restoreLocation);
inst.regX64 = reg;
inst.spilled = false;
inst.needsReload = false;
spills[i] = spills.back();
spills.pop_back();
return;
}
}
}
void IrRegAllocX64::preserveAndFreeInstValues()
{
for (uint32_t instIdx : gprInstUsers)
{
if (instIdx != kInvalidInstIdx)
preserve(function.instructions[instIdx]);
}
for (uint32_t instIdx : xmmInstUsers)
{
if (instIdx != kInvalidInstIdx)
preserve(function.instructions[instIdx]);
}
}
bool IrRegAllocX64::shouldFreeGpr(RegisterX64 reg) const
{
if (reg == noreg)
return false;
LUAU_ASSERT(reg.size != SizeX64::xmmword);
for (RegisterX64 gpr : kGprAllocOrder)
{
if (reg.index == gpr.index)
return true;
}
return false;
}
unsigned IrRegAllocX64::findSpillStackSlot(IrValueKind valueKind)
{
// Find a free stack slot. Two consecutive slots might be required for 16 byte TValues, so '- 1' is used
for (unsigned i = 0; i < unsigned(usedSpillSlots.size() - 1); ++i)
{
if (usedSpillSlots.test(i))
continue;
if (valueKind == IrValueKind::Tvalue && usedSpillSlots.test(i + 1))
{
++i; // No need to retest this double position
continue;
}
return i;
}
LUAU_ASSERT(!"nowhere to spill");
return ~0u;
}
IrOp IrRegAllocX64::getRestoreOp(const IrInst& inst) const
{
if (IrOp location = function.findRestoreOp(inst); location.kind == IrOpKind::VmReg || location.kind == IrOpKind::VmConst)
return location;
return IrOp();
}
bool IrRegAllocX64::hasRestoreOp(const IrInst& inst) const
{
return getRestoreOp(inst).kind != IrOpKind::None;
}
OperandX64 IrRegAllocX64::getRestoreAddress(const IrInst& inst, IrOp restoreOp)
{
switch (getCmdValueKind(inst.cmd))
{
case IrValueKind::Unknown:
case IrValueKind::None:
LUAU_ASSERT(!"Invalid operand restore value kind");
break;
case IrValueKind::Tag:
return restoreOp.kind == IrOpKind::VmReg ? luauRegTag(vmRegOp(restoreOp)) : luauConstantTag(vmConstOp(restoreOp));
case IrValueKind::Int:
LUAU_ASSERT(restoreOp.kind == IrOpKind::VmReg);
return luauRegValueInt(vmRegOp(restoreOp));
case IrValueKind::Pointer:
return restoreOp.kind == IrOpKind::VmReg ? luauRegValue(vmRegOp(restoreOp)) : luauConstantValue(vmConstOp(restoreOp));
case IrValueKind::Double:
return restoreOp.kind == IrOpKind::VmReg ? luauRegValue(vmRegOp(restoreOp)) : luauConstantValue(vmConstOp(restoreOp));
case IrValueKind::Tvalue:
return restoreOp.kind == IrOpKind::VmReg ? luauReg(vmRegOp(restoreOp)) : luauConstant(vmConstOp(restoreOp));
}
LUAU_ASSERT(!"Failed to find restore operand location");
return noreg;
}
uint32_t IrRegAllocX64::findInstructionWithFurthestNextUse(const std::array<uint32_t, 16>& regInstUsers) const
{
uint32_t furthestUseTarget = kInvalidInstIdx;
uint32_t furthestUseLocation = 0;
for (uint32_t regInstUser : regInstUsers)
{
// Cannot spill temporary registers or the register of the value that's defined in the current instruction
if (regInstUser == kInvalidInstIdx || regInstUser == currInstIdx)
continue;
uint32_t nextUse = getNextInstUse(function, regInstUser, currInstIdx);
// Cannot spill value that is about to be used in the current instruction
if (nextUse == currInstIdx)
continue;
if (furthestUseTarget == kInvalidInstIdx || nextUse > furthestUseLocation)
{
furthestUseLocation = nextUse;
furthestUseTarget = regInstUser;
}
}
return furthestUseTarget;
}
void IrRegAllocX64::assertFree(RegisterX64 reg) const
{
if (reg.size == SizeX64::xmmword)
LUAU_ASSERT(freeXmmMap[reg.index]);
else
LUAU_ASSERT(freeGprMap[reg.index]);
}
void IrRegAllocX64::assertAllFree() const
{
for (RegisterX64 reg : kGprAllocOrder)
LUAU_ASSERT(freeGprMap[reg.index]);
for (bool free : freeXmmMap)
LUAU_ASSERT(free);
}
void IrRegAllocX64::assertNoSpills() const
{
LUAU_ASSERT(spills.empty());
}
ScopedRegX64::ScopedRegX64(IrRegAllocX64& owner)
: owner(owner)
, reg(noreg)
{
}
ScopedRegX64::ScopedRegX64(IrRegAllocX64& owner, SizeX64 size)
: owner(owner)
, reg(noreg)
{
alloc(size);
}
ScopedRegX64::ScopedRegX64(IrRegAllocX64& owner, RegisterX64 reg)
: owner(owner)
, reg(reg)
{
}
ScopedRegX64::~ScopedRegX64()
{
if (reg != noreg)
owner.freeReg(reg);
}
void ScopedRegX64::alloc(SizeX64 size)
{
LUAU_ASSERT(reg == noreg);
reg = owner.allocReg(size, kInvalidInstIdx);
}
void ScopedRegX64::free()
{
LUAU_ASSERT(reg != noreg);
owner.freeReg(reg);
reg = noreg;
}
RegisterX64 ScopedRegX64::release()
{
RegisterX64 tmp = reg;
reg = noreg;
return tmp;
}
ScopedSpills::ScopedSpills(IrRegAllocX64& owner)
: owner(owner)
{
startSpillId = owner.nextSpillId;
}
ScopedSpills::~ScopedSpills()
{
unsigned endSpillId = owner.nextSpillId;
for (size_t i = 0; i < owner.spills.size();)
{
IrSpillX64& spill = owner.spills[i];
// Restoring spills inside this scope cannot create new spills
LUAU_ASSERT(spill.spillId < endSpillId);
// If spill was created inside current scope, it has to be restored
if (spill.spillId >= startSpillId)
{
IrInst& inst = owner.function.instructions[spill.instIdx];
owner.restore(inst, /*intoOriginalLocation*/ true);
// Spill restore removes the spill entry, so loop is repeated at the same 'i'
}
else
{
i++;
}
}
}
} // namespace X64
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,827 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "IrTranslateBuiltins.h"
#include "Luau/Bytecode.h"
#include "Luau/IrBuilder.h"
#include "lstate.h"
#include <math.h>
// TODO: when nresults is less than our actual result count, we can skip computing/writing unused results
static const int kMinMaxUnrolledParams = 5;
static const int kBit32BinaryOpUnrolledParams = 5;
namespace Luau
{
namespace CodeGen
{
static void builtinCheckDouble(IrBuilder& build, IrOp arg, IrOp fallback)
{
if (arg.kind == IrOpKind::Constant)
LUAU_ASSERT(build.function.constOp(arg).kind == IrConstKind::Double);
else
build.loadAndCheckTag(arg, LUA_TNUMBER, fallback);
}
static IrOp builtinLoadDouble(IrBuilder& build, IrOp arg)
{
if (arg.kind == IrOpKind::Constant)
return arg;
return build.inst(IrCmd::LOAD_DOUBLE, arg);
}
// Wrapper code for all builtins with a fixed signature and manual assembly lowering of the body
// (number, ...) -> number
static BuiltinImplResult translateBuiltinNumberToNumber(
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 1 || nresults > 1)
return {BuiltinImplType::None, -1};
builtinCheckDouble(build, build.vmReg(arg), fallback);
build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(1), build.constInt(1));
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
return {BuiltinImplType::UsesFallback, 1};
}
static BuiltinImplResult translateBuiltinNumberToNumberLibm(
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 1 || nresults > 1)
return {BuiltinImplType::None, -1};
builtinCheckDouble(build, build.vmReg(arg), fallback);
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
IrOp res = build.inst(IrCmd::INVOKE_LIBM, build.constUint(bfid), va);
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res);
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
return {BuiltinImplType::UsesFallback, 1};
}
static BuiltinImplResult translateBuiltin2NumberToNumberLibm(
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 2 || nresults > 1)
return {BuiltinImplType::None, -1};
builtinCheckDouble(build, build.vmReg(arg), fallback);
builtinCheckDouble(build, args, fallback);
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
IrOp vb = builtinLoadDouble(build, args);
IrOp res = build.inst(IrCmd::INVOKE_LIBM, build.constUint(bfid), va, vb);
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res);
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
return {BuiltinImplType::UsesFallback, 1};
}
static BuiltinImplResult translateBuiltinMathLdexp(
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 2 || nresults > 1)
return {BuiltinImplType::None, -1};
builtinCheckDouble(build, build.vmReg(arg), fallback);
builtinCheckDouble(build, args, fallback);
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
IrOp vb = builtinLoadDouble(build, args);
IrOp vbi = build.inst(IrCmd::NUM_TO_INT, vb);
IrOp res = build.inst(IrCmd::INVOKE_LIBM, build.constUint(bfid), va, vbi);
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res);
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
return {BuiltinImplType::UsesFallback, 1};
}
// (number, ...) -> (number, number)
static BuiltinImplResult translateBuiltinNumberTo2Number(
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 1 || nresults > 2)
return {BuiltinImplType::None, -1};
builtinCheckDouble(build, build.vmReg(arg), fallback);
build.inst(
IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(1), build.constInt(nresults == 1 ? 1 : 2));
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
if (nresults != 1)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra + 1), build.constTag(LUA_TNUMBER));
return {BuiltinImplType::UsesFallback, 2};
}
static BuiltinImplResult translateBuiltinAssert(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 1 || nresults != 0)
return {BuiltinImplType::None, -1};
IrOp cont = build.block(IrBlockKind::Internal);
// TODO: maybe adding a guard like CHECK_TRUTHY can be useful
build.inst(IrCmd::JUMP_IF_FALSY, build.vmReg(arg), fallback, cont);
build.beginBlock(cont);
return {BuiltinImplType::UsesFallback, 0};
}
static BuiltinImplResult translateBuiltinMathDeg(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 1 || nresults > 1)
return {BuiltinImplType::None, -1};
builtinCheckDouble(build, build.vmReg(arg), fallback);
const double rpd = (3.14159265358979323846 / 180.0);
IrOp varg = builtinLoadDouble(build, build.vmReg(arg));
IrOp value = build.inst(IrCmd::DIV_NUM, varg, build.constDouble(rpd));
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
return {BuiltinImplType::UsesFallback, 1};
}
static BuiltinImplResult translateBuiltinMathRad(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 1 || nresults > 1)
return {BuiltinImplType::None, -1};
builtinCheckDouble(build, build.vmReg(arg), fallback);
const double rpd = (3.14159265358979323846 / 180.0);
IrOp varg = builtinLoadDouble(build, build.vmReg(arg));
IrOp value = build.inst(IrCmd::MUL_NUM, varg, build.constDouble(rpd));
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
return {BuiltinImplType::UsesFallback, 1};
}
static BuiltinImplResult translateBuiltinMathLog(
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 1 || nresults > 1)
return {BuiltinImplType::None, -1};
int libmId = bfid;
std::optional<double> denom;
if (nparams != 1)
{
std::optional<double> y = build.function.asDoubleOp(args);
if (!y)
return {BuiltinImplType::None, -1};
if (*y == 2.0)
libmId = LBF_IR_MATH_LOG2;
else if (*y == 10.0)
libmId = LBF_MATH_LOG10;
else
denom = log(*y);
}
builtinCheckDouble(build, build.vmReg(arg), fallback);
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
IrOp res = build.inst(IrCmd::INVOKE_LIBM, build.constUint(libmId), va);
if (denom)
res = build.inst(IrCmd::DIV_NUM, res, build.constDouble(*denom));
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res);
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
return {BuiltinImplType::UsesFallback, 1};
}
static BuiltinImplResult translateBuiltinMathMin(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 2 || nparams > kMinMaxUnrolledParams || nresults > 1)
return {BuiltinImplType::None, -1};
builtinCheckDouble(build, build.vmReg(arg), fallback);
builtinCheckDouble(build, args, fallback);
for (int i = 3; i <= nparams; ++i)
builtinCheckDouble(build, build.vmReg(vmRegOp(args) + (i - 2)), fallback);
IrOp varg1 = builtinLoadDouble(build, build.vmReg(arg));
IrOp varg2 = builtinLoadDouble(build, args);
IrOp res = build.inst(IrCmd::MIN_NUM, varg2, varg1); // Swapped arguments are required for consistency with VM builtins
for (int i = 3; i <= nparams; ++i)
{
IrOp arg = builtinLoadDouble(build, build.vmReg(vmRegOp(args) + (i - 2)));
res = build.inst(IrCmd::MIN_NUM, arg, res);
}
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res);
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
return {BuiltinImplType::UsesFallback, 1};
}
static BuiltinImplResult translateBuiltinMathMax(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 2 || nparams > kMinMaxUnrolledParams || nresults > 1)
return {BuiltinImplType::None, -1};
builtinCheckDouble(build, build.vmReg(arg), fallback);
builtinCheckDouble(build, args, fallback);
for (int i = 3; i <= nparams; ++i)
builtinCheckDouble(build, build.vmReg(vmRegOp(args) + (i - 2)), fallback);
IrOp varg1 = builtinLoadDouble(build, build.vmReg(arg));
IrOp varg2 = builtinLoadDouble(build, args);
IrOp res = build.inst(IrCmd::MAX_NUM, varg2, varg1); // Swapped arguments are required for consistency with VM builtins
for (int i = 3; i <= nparams; ++i)
{
IrOp arg = builtinLoadDouble(build, build.vmReg(vmRegOp(args) + (i - 2)));
res = build.inst(IrCmd::MAX_NUM, arg, res);
}
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res);
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
return {BuiltinImplType::UsesFallback, 1};
}
static BuiltinImplResult translateBuiltinMathClamp(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 3 || nresults > 1)
return {BuiltinImplType::None, -1};
IrOp block = build.block(IrBlockKind::Internal);
LUAU_ASSERT(args.kind == IrOpKind::VmReg);
builtinCheckDouble(build, build.vmReg(arg), fallback);
builtinCheckDouble(build, args, fallback);
builtinCheckDouble(build, build.vmReg(vmRegOp(args) + 1), fallback);
IrOp min = builtinLoadDouble(build, args);
IrOp max = builtinLoadDouble(build, build.vmReg(vmRegOp(args) + 1));
build.inst(IrCmd::JUMP_CMP_NUM, min, max, build.cond(IrCondition::NotLessEqual), fallback, block);
build.beginBlock(block);
IrOp v = builtinLoadDouble(build, build.vmReg(arg));
IrOp r = build.inst(IrCmd::MAX_NUM, min, v);
IrOp clamped = build.inst(IrCmd::MIN_NUM, max, r);
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), clamped);
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
return {BuiltinImplType::UsesFallback, 1};
}
static BuiltinImplResult translateBuiltinMathUnary(IrBuilder& build, IrCmd cmd, int nparams, int ra, int arg, int nresults, IrOp fallback)
{
if (nparams < 1 || nresults > 1)
return {BuiltinImplType::None, -1};
builtinCheckDouble(build, build.vmReg(arg), fallback);
IrOp varg = builtinLoadDouble(build, build.vmReg(arg));
IrOp result = build.inst(cmd, varg);
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), result);
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
return {BuiltinImplType::UsesFallback, 1};
}
static BuiltinImplResult translateBuiltinType(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 1 || nresults > 1)
return {BuiltinImplType::None, -1};
build.inst(IrCmd::FASTCALL, build.constUint(LBF_TYPE), build.vmReg(ra), build.vmReg(arg), args, build.constInt(1), build.constInt(1));
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TSTRING));
return {BuiltinImplType::UsesFallback, 1};
}
static BuiltinImplResult translateBuiltinTypeof(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 1 || nresults > 1)
return {BuiltinImplType::None, -1};
build.inst(IrCmd::FASTCALL, build.constUint(LBF_TYPEOF), build.vmReg(ra), build.vmReg(arg), args, build.constInt(1), build.constInt(1));
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TSTRING));
return {BuiltinImplType::UsesFallback, 1};
}
static BuiltinImplResult translateBuiltinBit32BinaryOp(
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 2 || nparams > kBit32BinaryOpUnrolledParams || nresults > 1)
return {BuiltinImplType::None, -1};
builtinCheckDouble(build, build.vmReg(arg), fallback);
builtinCheckDouble(build, args, fallback);
for (int i = 3; i <= nparams; ++i)
builtinCheckDouble(build, build.vmReg(vmRegOp(args) + (i - 2)), fallback);
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
IrOp vb = builtinLoadDouble(build, args);
IrOp vaui = build.inst(IrCmd::NUM_TO_UINT, va);
IrOp vbui = build.inst(IrCmd::NUM_TO_UINT, vb);
IrCmd cmd = IrCmd::NOP;
if (bfid == LBF_BIT32_BAND || bfid == LBF_BIT32_BTEST)
cmd = IrCmd::BITAND_UINT;
else if (bfid == LBF_BIT32_BXOR)
cmd = IrCmd::BITXOR_UINT;
else if (bfid == LBF_BIT32_BOR)
cmd = IrCmd::BITOR_UINT;
LUAU_ASSERT(cmd != IrCmd::NOP);
IrOp res = build.inst(cmd, vaui, vbui);
for (int i = 3; i <= nparams; ++i)
{
IrOp vc = builtinLoadDouble(build, build.vmReg(vmRegOp(args) + (i - 2)));
IrOp arg = build.inst(IrCmd::NUM_TO_UINT, vc);
res = build.inst(cmd, res, arg);
}
if (bfid == LBF_BIT32_BTEST)
{
IrOp falsey = build.block(IrBlockKind::Internal);
IrOp truthy = build.block(IrBlockKind::Internal);
IrOp exit = build.block(IrBlockKind::Internal);
build.inst(IrCmd::JUMP_EQ_INT, res, build.constInt(0), falsey, truthy);
build.beginBlock(falsey);
build.inst(IrCmd::STORE_INT, build.vmReg(ra), build.constInt(0));
build.inst(IrCmd::JUMP, exit);
build.beginBlock(truthy);
build.inst(IrCmd::STORE_INT, build.vmReg(ra), build.constInt(1));
build.inst(IrCmd::JUMP, exit);
build.beginBlock(exit);
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TBOOLEAN));
}
else
{
IrOp value = build.inst(IrCmd::UINT_TO_NUM, res);
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
}
return {BuiltinImplType::UsesFallback, 1};
}
static BuiltinImplResult translateBuiltinBit32Bnot(
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 1 || nresults > 1)
return {BuiltinImplType::None, -1};
builtinCheckDouble(build, build.vmReg(arg), fallback);
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
IrOp vaui = build.inst(IrCmd::NUM_TO_UINT, va);
IrOp not_ = build.inst(IrCmd::BITNOT_UINT, vaui);
IrOp value = build.inst(IrCmd::UINT_TO_NUM, not_);
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
return {BuiltinImplType::UsesFallback, 1};
}
static BuiltinImplResult translateBuiltinBit32Shift(
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 2 || nresults > 1)
return {BuiltinImplType::None, -1};
IrOp block = build.block(IrBlockKind::Internal);
builtinCheckDouble(build, build.vmReg(arg), fallback);
builtinCheckDouble(build, args, fallback);
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
IrOp vb = builtinLoadDouble(build, args);
IrOp vaui = build.inst(IrCmd::NUM_TO_UINT, va);
IrOp vbi = build.inst(IrCmd::NUM_TO_INT, vb);
build.inst(IrCmd::JUMP_GE_UINT, vbi, build.constInt(32), fallback, block);
build.beginBlock(block);
IrCmd cmd = IrCmd::NOP;
if (bfid == LBF_BIT32_LSHIFT)
cmd = IrCmd::BITLSHIFT_UINT;
else if (bfid == LBF_BIT32_RSHIFT)
cmd = IrCmd::BITRSHIFT_UINT;
else if (bfid == LBF_BIT32_ARSHIFT)
cmd = IrCmd::BITARSHIFT_UINT;
LUAU_ASSERT(cmd != IrCmd::NOP);
IrOp shift = build.inst(cmd, vaui, vbi);
IrOp value = build.inst(IrCmd::UINT_TO_NUM, shift);
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
return {BuiltinImplType::UsesFallback, 1};
}
static BuiltinImplResult translateBuiltinBit32Rotate(
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 2 || nresults > 1)
return {BuiltinImplType::None, -1};
builtinCheckDouble(build, build.vmReg(arg), fallback);
builtinCheckDouble(build, args, fallback);
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
IrOp vb = builtinLoadDouble(build, args);
IrOp vaui = build.inst(IrCmd::NUM_TO_UINT, va);
IrOp vbi = build.inst(IrCmd::NUM_TO_INT, vb);
IrCmd cmd = (bfid == LBF_BIT32_LROTATE) ? IrCmd::BITLROTATE_UINT : IrCmd::BITRROTATE_UINT;
IrOp shift = build.inst(cmd, vaui, vbi);
IrOp value = build.inst(IrCmd::UINT_TO_NUM, shift);
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
return {BuiltinImplType::UsesFallback, 1};
}
static BuiltinImplResult translateBuiltinBit32Extract(
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 2 || nresults > 1)
return {BuiltinImplType::None, -1};
builtinCheckDouble(build, build.vmReg(arg), fallback);
builtinCheckDouble(build, args, fallback);
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
IrOp vb = builtinLoadDouble(build, args);
IrOp n = build.inst(IrCmd::NUM_TO_UINT, va);
IrOp f = build.inst(IrCmd::NUM_TO_INT, vb);
IrOp value;
if (nparams == 2)
{
IrOp block = build.block(IrBlockKind::Internal);
build.inst(IrCmd::JUMP_GE_UINT, f, build.constInt(32), fallback, block);
build.beginBlock(block);
// TODO: this can be optimized using a bit-select instruction (bt on x86)
IrOp shift = build.inst(IrCmd::BITRSHIFT_UINT, n, f);
value = build.inst(IrCmd::BITAND_UINT, shift, build.constInt(1));
}
else
{
builtinCheckDouble(build, build.vmReg(args.index + 1), fallback);
IrOp vc = builtinLoadDouble(build, build.vmReg(args.index + 1));
IrOp w = build.inst(IrCmd::NUM_TO_INT, vc);
IrOp block1 = build.block(IrBlockKind::Internal);
build.inst(IrCmd::JUMP_LT_INT, f, build.constInt(0), fallback, block1);
build.beginBlock(block1);
IrOp block2 = build.block(IrBlockKind::Internal);
build.inst(IrCmd::JUMP_LT_INT, w, build.constInt(1), fallback, block2);
build.beginBlock(block2);
IrOp block3 = build.block(IrBlockKind::Internal);
IrOp fw = build.inst(IrCmd::ADD_INT, f, w);
build.inst(IrCmd::JUMP_LT_INT, fw, build.constInt(33), block3, fallback);
build.beginBlock(block3);
IrOp shift = build.inst(IrCmd::BITLSHIFT_UINT, build.constInt(0xfffffffe), build.inst(IrCmd::SUB_INT, w, build.constInt(1)));
IrOp m = build.inst(IrCmd::BITNOT_UINT, shift);
IrOp nf = build.inst(IrCmd::BITRSHIFT_UINT, n, f);
value = build.inst(IrCmd::BITAND_UINT, nf, m);
}
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), build.inst(IrCmd::UINT_TO_NUM, value));
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
return {BuiltinImplType::UsesFallback, 1};
}
static BuiltinImplResult translateBuiltinBit32ExtractK(
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 2 || nresults > 1)
return {BuiltinImplType::None, -1};
builtinCheckDouble(build, build.vmReg(arg), fallback);
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
IrOp n = build.inst(IrCmd::NUM_TO_UINT, va);
double a2 = build.function.doubleOp(args);
int fw = int(a2);
int f = fw & 31;
int w1 = fw >> 5;
uint32_t m = ~(0xfffffffeu << w1);
IrOp nf = build.inst(IrCmd::BITRSHIFT_UINT, n, build.constInt(f));
IrOp and_ = build.inst(IrCmd::BITAND_UINT, nf, build.constInt(m));
IrOp value = build.inst(IrCmd::UINT_TO_NUM, and_);
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
return {BuiltinImplType::UsesFallback, 1};
}
static BuiltinImplResult translateBuiltinBit32Countz(
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 1 || nresults > 1)
return {BuiltinImplType::None, -1};
builtinCheckDouble(build, build.vmReg(arg), fallback);
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
IrOp vaui = build.inst(IrCmd::NUM_TO_UINT, va);
IrCmd cmd = (bfid == LBF_BIT32_COUNTLZ) ? IrCmd::BITCOUNTLZ_UINT : IrCmd::BITCOUNTRZ_UINT;
IrOp bin = build.inst(cmd, vaui);
IrOp value = build.inst(IrCmd::UINT_TO_NUM, bin);
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
return {BuiltinImplType::UsesFallback, 1};
}
static BuiltinImplResult translateBuiltinBit32Replace(
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 3 || nresults > 1)
return {BuiltinImplType::None, -1};
builtinCheckDouble(build, build.vmReg(arg), fallback);
builtinCheckDouble(build, args, fallback);
builtinCheckDouble(build, build.vmReg(args.index + 1), fallback);
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
IrOp vb = builtinLoadDouble(build, args);
IrOp vc = builtinLoadDouble(build, build.vmReg(args.index + 1));
IrOp n = build.inst(IrCmd::NUM_TO_UINT, va);
IrOp v = build.inst(IrCmd::NUM_TO_UINT, vb);
IrOp f = build.inst(IrCmd::NUM_TO_INT, vc);
IrOp value;
if (nparams == 3)
{
IrOp block = build.block(IrBlockKind::Internal);
build.inst(IrCmd::JUMP_GE_UINT, f, build.constInt(32), fallback, block);
build.beginBlock(block);
// TODO: this can be optimized using a bit-select instruction (btr on x86)
IrOp m = build.constInt(1);
IrOp shift = build.inst(IrCmd::BITLSHIFT_UINT, m, f);
IrOp not_ = build.inst(IrCmd::BITNOT_UINT, shift);
IrOp lhs = build.inst(IrCmd::BITAND_UINT, n, not_);
IrOp vm = build.inst(IrCmd::BITAND_UINT, v, m);
IrOp rhs = build.inst(IrCmd::BITLSHIFT_UINT, vm, f);
value = build.inst(IrCmd::BITOR_UINT, lhs, rhs);
}
else
{
builtinCheckDouble(build, build.vmReg(args.index + 2), fallback);
IrOp vd = builtinLoadDouble(build, build.vmReg(args.index + 2));
IrOp w = build.inst(IrCmd::NUM_TO_INT, vd);
IrOp block1 = build.block(IrBlockKind::Internal);
build.inst(IrCmd::JUMP_LT_INT, f, build.constInt(0), fallback, block1);
build.beginBlock(block1);
IrOp block2 = build.block(IrBlockKind::Internal);
build.inst(IrCmd::JUMP_LT_INT, w, build.constInt(1), fallback, block2);
build.beginBlock(block2);
IrOp block3 = build.block(IrBlockKind::Internal);
IrOp fw = build.inst(IrCmd::ADD_INT, f, w);
build.inst(IrCmd::JUMP_LT_INT, fw, build.constInt(33), block3, fallback);
build.beginBlock(block3);
IrOp shift1 = build.inst(IrCmd::BITLSHIFT_UINT, build.constInt(0xfffffffe), build.inst(IrCmd::SUB_INT, w, build.constInt(1)));
IrOp m = build.inst(IrCmd::BITNOT_UINT, shift1);
IrOp shift2 = build.inst(IrCmd::BITLSHIFT_UINT, m, f);
IrOp not_ = build.inst(IrCmd::BITNOT_UINT, shift2);
IrOp lhs = build.inst(IrCmd::BITAND_UINT, n, not_);
IrOp vm = build.inst(IrCmd::BITAND_UINT, v, m);
IrOp rhs = build.inst(IrCmd::BITLSHIFT_UINT, vm, f);
value = build.inst(IrCmd::BITOR_UINT, lhs, rhs);
}
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), build.inst(IrCmd::UINT_TO_NUM, value));
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
return {BuiltinImplType::UsesFallback, 1};
}
static BuiltinImplResult translateBuiltinVector(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
if (nparams < 3 || nresults > 1)
return {BuiltinImplType::None, -1};
LUAU_ASSERT(LUA_VECTOR_SIZE == 3);
builtinCheckDouble(build, build.vmReg(arg), fallback);
builtinCheckDouble(build, args, fallback);
builtinCheckDouble(build, build.vmReg(vmRegOp(args) + 1), fallback);
IrOp x = builtinLoadDouble(build, build.vmReg(arg));
IrOp y = builtinLoadDouble(build, args);
IrOp z = builtinLoadDouble(build, build.vmReg(vmRegOp(args) + 1));
build.inst(IrCmd::STORE_VECTOR, build.vmReg(ra), x, y, z);
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TVECTOR));
return {BuiltinImplType::UsesFallback, 1};
}
BuiltinImplResult translateBuiltin(IrBuilder& build, int bfid, int ra, int arg, IrOp args, int nparams, int nresults, IrOp fallback)
{
// Builtins are not allowed to handle variadic arguments
if (nparams == LUA_MULTRET)
return {BuiltinImplType::None, -1};
switch (bfid)
{
case LBF_ASSERT:
return translateBuiltinAssert(build, nparams, ra, arg, args, nresults, fallback);
case LBF_MATH_DEG:
return translateBuiltinMathDeg(build, nparams, ra, arg, args, nresults, fallback);
case LBF_MATH_RAD:
return translateBuiltinMathRad(build, nparams, ra, arg, args, nresults, fallback);
case LBF_MATH_LOG:
return translateBuiltinMathLog(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
case LBF_MATH_MIN:
return translateBuiltinMathMin(build, nparams, ra, arg, args, nresults, fallback);
case LBF_MATH_MAX:
return translateBuiltinMathMax(build, nparams, ra, arg, args, nresults, fallback);
case LBF_MATH_CLAMP:
return translateBuiltinMathClamp(build, nparams, ra, arg, args, nresults, fallback);
case LBF_MATH_FLOOR:
return translateBuiltinMathUnary(build, IrCmd::FLOOR_NUM, nparams, ra, arg, nresults, fallback);
case LBF_MATH_CEIL:
return translateBuiltinMathUnary(build, IrCmd::CEIL_NUM, nparams, ra, arg, nresults, fallback);
case LBF_MATH_SQRT:
return translateBuiltinMathUnary(build, IrCmd::SQRT_NUM, nparams, ra, arg, nresults, fallback);
case LBF_MATH_ABS:
return translateBuiltinMathUnary(build, IrCmd::ABS_NUM, nparams, ra, arg, nresults, fallback);
case LBF_MATH_ROUND:
return translateBuiltinMathUnary(build, IrCmd::ROUND_NUM, nparams, ra, arg, nresults, fallback);
case LBF_MATH_EXP:
case LBF_MATH_ASIN:
case LBF_MATH_SIN:
case LBF_MATH_SINH:
case LBF_MATH_ACOS:
case LBF_MATH_COS:
case LBF_MATH_COSH:
case LBF_MATH_ATAN:
case LBF_MATH_TAN:
case LBF_MATH_TANH:
case LBF_MATH_LOG10:
return translateBuiltinNumberToNumberLibm(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
case LBF_MATH_SIGN:
return translateBuiltinNumberToNumber(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
case LBF_MATH_POW:
case LBF_MATH_FMOD:
case LBF_MATH_ATAN2:
return translateBuiltin2NumberToNumberLibm(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
case LBF_MATH_LDEXP:
return translateBuiltinMathLdexp(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
case LBF_MATH_FREXP:
case LBF_MATH_MODF:
return translateBuiltinNumberTo2Number(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
case LBF_BIT32_BAND:
case LBF_BIT32_BOR:
case LBF_BIT32_BXOR:
case LBF_BIT32_BTEST:
return translateBuiltinBit32BinaryOp(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
case LBF_BIT32_BNOT:
return translateBuiltinBit32Bnot(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
case LBF_BIT32_LSHIFT:
case LBF_BIT32_RSHIFT:
case LBF_BIT32_ARSHIFT:
return translateBuiltinBit32Shift(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
case LBF_BIT32_LROTATE:
case LBF_BIT32_RROTATE:
return translateBuiltinBit32Rotate(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
case LBF_BIT32_EXTRACT:
return translateBuiltinBit32Extract(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
case LBF_BIT32_EXTRACTK:
return translateBuiltinBit32ExtractK(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
case LBF_BIT32_COUNTLZ:
case LBF_BIT32_COUNTRZ:
return translateBuiltinBit32Countz(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
case LBF_BIT32_REPLACE:
return translateBuiltinBit32Replace(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
case LBF_TYPE:
return translateBuiltinType(build, nparams, ra, arg, args, nresults, fallback);
case LBF_TYPEOF:
return translateBuiltinTypeof(build, nparams, ra, arg, args, nresults, fallback);
case LBF_VECTOR:
return translateBuiltinVector(build, nparams, ra, arg, args, nresults, fallback);
default:
return {BuiltinImplType::None, -1};
}
}
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,27 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
namespace Luau
{
namespace CodeGen
{
struct IrBuilder;
struct IrOp;
enum class BuiltinImplType
{
None,
UsesFallback, // Uses fallback for unsupported cases
};
struct BuiltinImplResult
{
BuiltinImplType type;
int actualResultCount;
};
BuiltinImplResult translateBuiltin(IrBuilder& build, int bfid, int ra, int arg, IrOp args, int nparams, int nresults, IrOp fallback);
} // namespace CodeGen
} // namespace Luau

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,68 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include <stdint.h>
#include "ltm.h"
typedef uint32_t Instruction;
namespace Luau
{
namespace CodeGen
{
enum class IrCondition : uint8_t;
struct IrOp;
struct IrBuilder;
enum class IrCmd : uint8_t;
void translateInstLoadNil(IrBuilder& build, const Instruction* pc);
void translateInstLoadB(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstLoadN(IrBuilder& build, const Instruction* pc);
void translateInstLoadK(IrBuilder& build, const Instruction* pc);
void translateInstLoadKX(IrBuilder& build, const Instruction* pc);
void translateInstMove(IrBuilder& build, const Instruction* pc);
void translateInstJump(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstJumpBack(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstJumpIf(IrBuilder& build, const Instruction* pc, int pcpos, bool not_);
void translateInstJumpIfEq(IrBuilder& build, const Instruction* pc, int pcpos, bool not_);
void translateInstJumpIfCond(IrBuilder& build, const Instruction* pc, int pcpos, IrCondition cond);
void translateInstJumpX(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstJumpxEqNil(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstJumpxEqB(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstJumpxEqN(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstJumpxEqS(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstBinary(IrBuilder& build, const Instruction* pc, int pcpos, TMS tm);
void translateInstBinaryK(IrBuilder& build, const Instruction* pc, int pcpos, TMS tm);
void translateInstNot(IrBuilder& build, const Instruction* pc);
void translateInstMinus(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstLength(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstNewTable(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstDupTable(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstGetUpval(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstSetUpval(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstCloseUpvals(IrBuilder& build, const Instruction* pc);
void translateFastCallN(IrBuilder& build, const Instruction* pc, int pcpos, bool customParams, int customParamCount, IrOp customArgs, IrOp next);
void translateInstForNPrep(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstForNLoop(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstForGPrepNext(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstForGPrepInext(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstForGLoopIpairs(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstGetTableN(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstSetTableN(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstGetTable(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstSetTable(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstGetImport(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstGetTableKS(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstSetTableKS(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstGetGlobal(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstSetGlobal(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstConcat(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstCapture(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstNamecall(IrBuilder& build, const Instruction* pc, int pcpos);
void translateInstAndX(IrBuilder& build, const Instruction* pc, int pcpos, IrOp c);
void translateInstOrX(IrBuilder& build, const Instruction* pc, int pcpos, IrOp c);
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,791 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/IrUtils.h"
#include "Luau/IrBuilder.h"
#include "BitUtils.h"
#include "NativeState.h"
#include "lua.h"
#include "lnumutils.h"
#include <limits.h>
#include <math.h>
namespace Luau
{
namespace CodeGen
{
IrValueKind getCmdValueKind(IrCmd cmd)
{
switch (cmd)
{
case IrCmd::NOP:
return IrValueKind::None;
case IrCmd::LOAD_TAG:
return IrValueKind::Tag;
case IrCmd::LOAD_POINTER:
return IrValueKind::Pointer;
case IrCmd::LOAD_DOUBLE:
return IrValueKind::Double;
case IrCmd::LOAD_INT:
return IrValueKind::Int;
case IrCmd::LOAD_TVALUE:
case IrCmd::LOAD_NODE_VALUE_TV:
return IrValueKind::Tvalue;
case IrCmd::LOAD_ENV:
case IrCmd::GET_ARR_ADDR:
case IrCmd::GET_SLOT_NODE_ADDR:
case IrCmd::GET_HASH_NODE_ADDR:
return IrValueKind::Pointer;
case IrCmd::STORE_TAG:
case IrCmd::STORE_POINTER:
case IrCmd::STORE_DOUBLE:
case IrCmd::STORE_INT:
case IrCmd::STORE_VECTOR:
case IrCmd::STORE_TVALUE:
case IrCmd::STORE_NODE_VALUE_TV:
return IrValueKind::None;
case IrCmd::ADD_INT:
case IrCmd::SUB_INT:
return IrValueKind::Int;
case IrCmd::ADD_NUM:
case IrCmd::SUB_NUM:
case IrCmd::MUL_NUM:
case IrCmd::DIV_NUM:
case IrCmd::MOD_NUM:
case IrCmd::MIN_NUM:
case IrCmd::MAX_NUM:
case IrCmd::UNM_NUM:
case IrCmd::FLOOR_NUM:
case IrCmd::CEIL_NUM:
case IrCmd::ROUND_NUM:
case IrCmd::SQRT_NUM:
case IrCmd::ABS_NUM:
return IrValueKind::Double;
case IrCmd::NOT_ANY:
return IrValueKind::Int;
case IrCmd::JUMP:
case IrCmd::JUMP_IF_TRUTHY:
case IrCmd::JUMP_IF_FALSY:
case IrCmd::JUMP_EQ_TAG:
case IrCmd::JUMP_EQ_INT:
case IrCmd::JUMP_LT_INT:
case IrCmd::JUMP_GE_UINT:
case IrCmd::JUMP_EQ_POINTER:
case IrCmd::JUMP_CMP_NUM:
case IrCmd::JUMP_CMP_ANY:
case IrCmd::JUMP_SLOT_MATCH:
return IrValueKind::None;
case IrCmd::TABLE_LEN:
return IrValueKind::Double;
case IrCmd::NEW_TABLE:
case IrCmd::DUP_TABLE:
return IrValueKind::Pointer;
case IrCmd::TRY_NUM_TO_INDEX:
return IrValueKind::Int;
case IrCmd::TRY_CALL_FASTGETTM:
return IrValueKind::Pointer;
case IrCmd::INT_TO_NUM:
case IrCmd::UINT_TO_NUM:
return IrValueKind::Double;
case IrCmd::NUM_TO_INT:
case IrCmd::NUM_TO_UINT:
return IrValueKind::Int;
case IrCmd::ADJUST_STACK_TO_REG:
case IrCmd::ADJUST_STACK_TO_TOP:
return IrValueKind::None;
case IrCmd::FASTCALL:
return IrValueKind::None;
case IrCmd::INVOKE_FASTCALL:
return IrValueKind::Int;
case IrCmd::CHECK_FASTCALL_RES:
case IrCmd::DO_ARITH:
case IrCmd::DO_LEN:
case IrCmd::GET_TABLE:
case IrCmd::SET_TABLE:
case IrCmd::GET_IMPORT:
case IrCmd::CONCAT:
case IrCmd::GET_UPVALUE:
case IrCmd::SET_UPVALUE:
case IrCmd::PREPARE_FORN:
case IrCmd::CHECK_TAG:
case IrCmd::CHECK_READONLY:
case IrCmd::CHECK_NO_METATABLE:
case IrCmd::CHECK_SAFE_ENV:
case IrCmd::CHECK_ARRAY_SIZE:
case IrCmd::CHECK_SLOT_MATCH:
case IrCmd::CHECK_NODE_NO_NEXT:
case IrCmd::INTERRUPT:
case IrCmd::CHECK_GC:
case IrCmd::BARRIER_OBJ:
case IrCmd::BARRIER_TABLE_BACK:
case IrCmd::BARRIER_TABLE_FORWARD:
case IrCmd::SET_SAVEDPC:
case IrCmd::CLOSE_UPVALS:
case IrCmd::CAPTURE:
case IrCmd::SETLIST:
case IrCmd::CALL:
case IrCmd::RETURN:
case IrCmd::FORGLOOP:
case IrCmd::FORGLOOP_FALLBACK:
case IrCmd::FORGPREP_XNEXT_FALLBACK:
case IrCmd::COVERAGE:
case IrCmd::FALLBACK_GETGLOBAL:
case IrCmd::FALLBACK_SETGLOBAL:
case IrCmd::FALLBACK_GETTABLEKS:
case IrCmd::FALLBACK_SETTABLEKS:
case IrCmd::FALLBACK_NAMECALL:
case IrCmd::FALLBACK_PREPVARARGS:
case IrCmd::FALLBACK_GETVARARGS:
case IrCmd::FALLBACK_NEWCLOSURE:
case IrCmd::FALLBACK_DUPCLOSURE:
case IrCmd::FALLBACK_FORGPREP:
return IrValueKind::None;
case IrCmd::SUBSTITUTE:
return IrValueKind::Unknown;
case IrCmd::BITAND_UINT:
case IrCmd::BITXOR_UINT:
case IrCmd::BITOR_UINT:
case IrCmd::BITNOT_UINT:
case IrCmd::BITLSHIFT_UINT:
case IrCmd::BITRSHIFT_UINT:
case IrCmd::BITARSHIFT_UINT:
case IrCmd::BITLROTATE_UINT:
case IrCmd::BITRROTATE_UINT:
case IrCmd::BITCOUNTLZ_UINT:
case IrCmd::BITCOUNTRZ_UINT:
return IrValueKind::Int;
case IrCmd::INVOKE_LIBM:
return IrValueKind::Double;
}
LUAU_UNREACHABLE();
}
static void removeInstUse(IrFunction& function, uint32_t instIdx)
{
IrInst& inst = function.instructions[instIdx];
LUAU_ASSERT(inst.useCount);
inst.useCount--;
if (inst.useCount == 0)
kill(function, inst);
}
static void removeBlockUse(IrFunction& function, uint32_t blockIdx)
{
IrBlock& block = function.blocks[blockIdx];
LUAU_ASSERT(block.useCount);
block.useCount--;
// Entry block is never removed because is has an implicit use
if (block.useCount == 0 && blockIdx != 0)
kill(function, block);
}
void addUse(IrFunction& function, IrOp op)
{
if (op.kind == IrOpKind::Inst)
function.instructions[op.index].useCount++;
else if (op.kind == IrOpKind::Block)
function.blocks[op.index].useCount++;
}
void removeUse(IrFunction& function, IrOp op)
{
if (op.kind == IrOpKind::Inst)
removeInstUse(function, op.index);
else if (op.kind == IrOpKind::Block)
removeBlockUse(function, op.index);
}
bool isGCO(uint8_t tag)
{
// mirrors iscollectable(o) from VM/lobject.h
return tag >= LUA_TSTRING;
}
void kill(IrFunction& function, IrInst& inst)
{
LUAU_ASSERT(inst.useCount == 0);
inst.cmd = IrCmd::NOP;
removeUse(function, inst.a);
removeUse(function, inst.b);
removeUse(function, inst.c);
removeUse(function, inst.d);
removeUse(function, inst.e);
removeUse(function, inst.f);
inst.a = {};
inst.b = {};
inst.c = {};
inst.d = {};
inst.e = {};
inst.f = {};
}
void kill(IrFunction& function, uint32_t start, uint32_t end)
{
// Kill instructions in reverse order to avoid killing instructions that are still marked as used
for (int i = int(end); i >= int(start); i--)
{
LUAU_ASSERT(unsigned(i) < function.instructions.size());
IrInst& curr = function.instructions[i];
if (curr.cmd == IrCmd::NOP)
continue;
kill(function, curr);
}
}
void kill(IrFunction& function, IrBlock& block)
{
LUAU_ASSERT(block.useCount == 0);
block.kind = IrBlockKind::Dead;
kill(function, block.start, block.finish);
block.start = ~0u;
block.finish = ~0u;
}
void replace(IrFunction& function, IrOp& original, IrOp replacement)
{
// Add use before removing new one if that's the last one keeping target operand alive
addUse(function, replacement);
removeUse(function, original);
original = replacement;
}
void replace(IrFunction& function, IrBlock& block, uint32_t instIdx, IrInst replacement)
{
IrInst& inst = function.instructions[instIdx];
// Add uses before removing new ones if those are the last ones keeping target operand alive
addUse(function, replacement.a);
addUse(function, replacement.b);
addUse(function, replacement.c);
addUse(function, replacement.d);
addUse(function, replacement.e);
addUse(function, replacement.f);
// An extra reference is added so block will not remove itself
block.useCount++;
// If we introduced an earlier terminating instruction, all following instructions become dead
if (!isBlockTerminator(inst.cmd) && isBlockTerminator(replacement.cmd))
{
// Block has has to be fully constructed before replacement is performed
LUAU_ASSERT(block.finish != ~0u);
LUAU_ASSERT(instIdx + 1 <= block.finish);
kill(function, instIdx + 1, block.finish);
block.finish = instIdx;
}
removeUse(function, inst.a);
removeUse(function, inst.b);
removeUse(function, inst.c);
removeUse(function, inst.d);
removeUse(function, inst.e);
removeUse(function, inst.f);
// Inherit existing use count (last use is skipped as it will be defined later)
replacement.useCount = inst.useCount;
inst = replacement;
// Removing the earlier extra reference, this might leave the block without users without marking it as dead
// This will have to be handled by separate dead code elimination
block.useCount--;
}
void substitute(IrFunction& function, IrInst& inst, IrOp replacement)
{
LUAU_ASSERT(!isBlockTerminator(inst.cmd));
inst.cmd = IrCmd::SUBSTITUTE;
addUse(function, replacement);
removeUse(function, inst.a);
removeUse(function, inst.b);
removeUse(function, inst.c);
removeUse(function, inst.d);
removeUse(function, inst.e);
removeUse(function, inst.f);
inst.a = replacement;
inst.b = {};
inst.c = {};
inst.d = {};
inst.e = {};
inst.f = {};
}
void applySubstitutions(IrFunction& function, IrOp& op)
{
if (op.kind == IrOpKind::Inst)
{
IrInst& src = function.instructions[op.index];
if (src.cmd == IrCmd::SUBSTITUTE)
{
op.kind = src.a.kind;
op.index = src.a.index;
// If we substitute with the result of a different instruction, update the use count
if (op.kind == IrOpKind::Inst)
{
IrInst& dst = function.instructions[op.index];
LUAU_ASSERT(dst.cmd != IrCmd::SUBSTITUTE && "chained substitutions are not allowed");
dst.useCount++;
}
LUAU_ASSERT(src.useCount > 0);
src.useCount--;
if (src.useCount == 0)
removeUse(function, src.a);
}
}
}
void applySubstitutions(IrFunction& function, IrInst& inst)
{
applySubstitutions(function, inst.a);
applySubstitutions(function, inst.b);
applySubstitutions(function, inst.c);
applySubstitutions(function, inst.d);
applySubstitutions(function, inst.e);
applySubstitutions(function, inst.f);
}
bool compare(double a, double b, IrCondition cond)
{
switch (cond)
{
case IrCondition::Equal:
return a == b;
case IrCondition::NotEqual:
return a != b;
case IrCondition::Less:
return a < b;
case IrCondition::NotLess:
return !(a < b);
case IrCondition::LessEqual:
return a <= b;
case IrCondition::NotLessEqual:
return !(a <= b);
case IrCondition::Greater:
return a > b;
case IrCondition::NotGreater:
return !(a > b);
case IrCondition::GreaterEqual:
return a >= b;
case IrCondition::NotGreaterEqual:
return !(a >= b);
default:
LUAU_ASSERT(!"unsupported conidtion");
}
return false;
}
void foldConstants(IrBuilder& build, IrFunction& function, IrBlock& block, uint32_t index)
{
IrInst& inst = function.instructions[index];
switch (inst.cmd)
{
case IrCmd::ADD_INT:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
{
// We need to avoid signed integer overflow, but we also have to produce a result
// So we add numbers as unsigned and use fixed-width integer types to force a two's complement evaluation
int32_t lhs = function.intOp(inst.a);
int32_t rhs = function.intOp(inst.b);
int sum = int32_t(uint32_t(lhs) + uint32_t(rhs));
substitute(function, inst, build.constInt(sum));
}
break;
case IrCmd::SUB_INT:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
{
// We need to avoid signed integer overflow, but we also have to produce a result
// So we subtract numbers as unsigned and use fixed-width integer types to force a two's complement evaluation
int32_t lhs = function.intOp(inst.a);
int32_t rhs = function.intOp(inst.b);
int sum = int32_t(uint32_t(lhs) - uint32_t(rhs));
substitute(function, inst, build.constInt(sum));
}
break;
case IrCmd::ADD_NUM:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
substitute(function, inst, build.constDouble(function.doubleOp(inst.a) + function.doubleOp(inst.b)));
break;
case IrCmd::SUB_NUM:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
substitute(function, inst, build.constDouble(function.doubleOp(inst.a) - function.doubleOp(inst.b)));
break;
case IrCmd::MUL_NUM:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
substitute(function, inst, build.constDouble(function.doubleOp(inst.a) * function.doubleOp(inst.b)));
break;
case IrCmd::DIV_NUM:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
substitute(function, inst, build.constDouble(function.doubleOp(inst.a) / function.doubleOp(inst.b)));
break;
case IrCmd::MOD_NUM:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
substitute(function, inst, build.constDouble(luai_nummod(function.doubleOp(inst.a), function.doubleOp(inst.b))));
break;
case IrCmd::MIN_NUM:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
{
double a1 = function.doubleOp(inst.a);
double a2 = function.doubleOp(inst.b);
substitute(function, inst, build.constDouble(a1 < a2 ? a1 : a2));
}
break;
case IrCmd::MAX_NUM:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
{
double a1 = function.doubleOp(inst.a);
double a2 = function.doubleOp(inst.b);
substitute(function, inst, build.constDouble(a1 > a2 ? a1 : a2));
}
break;
case IrCmd::UNM_NUM:
if (inst.a.kind == IrOpKind::Constant)
substitute(function, inst, build.constDouble(-function.doubleOp(inst.a)));
break;
case IrCmd::FLOOR_NUM:
if (inst.a.kind == IrOpKind::Constant)
substitute(function, inst, build.constDouble(floor(function.doubleOp(inst.a))));
break;
case IrCmd::CEIL_NUM:
if (inst.a.kind == IrOpKind::Constant)
substitute(function, inst, build.constDouble(ceil(function.doubleOp(inst.a))));
break;
case IrCmd::ROUND_NUM:
if (inst.a.kind == IrOpKind::Constant)
substitute(function, inst, build.constDouble(round(function.doubleOp(inst.a))));
break;
case IrCmd::SQRT_NUM:
if (inst.a.kind == IrOpKind::Constant)
substitute(function, inst, build.constDouble(sqrt(function.doubleOp(inst.a))));
break;
case IrCmd::ABS_NUM:
if (inst.a.kind == IrOpKind::Constant)
substitute(function, inst, build.constDouble(fabs(function.doubleOp(inst.a))));
break;
case IrCmd::NOT_ANY:
if (inst.a.kind == IrOpKind::Constant)
{
uint8_t a = function.tagOp(inst.a);
if (a == LUA_TNIL)
substitute(function, inst, build.constInt(1));
else if (a != LUA_TBOOLEAN)
substitute(function, inst, build.constInt(0));
else if (inst.b.kind == IrOpKind::Constant)
substitute(function, inst, build.constInt(function.intOp(inst.b) == 1 ? 0 : 1));
}
break;
case IrCmd::JUMP_EQ_TAG:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
{
if (function.tagOp(inst.a) == function.tagOp(inst.b))
replace(function, block, index, {IrCmd::JUMP, inst.c});
else
replace(function, block, index, {IrCmd::JUMP, inst.d});
}
break;
case IrCmd::JUMP_EQ_INT:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
{
if (function.intOp(inst.a) == function.intOp(inst.b))
replace(function, block, index, {IrCmd::JUMP, inst.c});
else
replace(function, block, index, {IrCmd::JUMP, inst.d});
}
break;
case IrCmd::JUMP_LT_INT:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
{
if (function.intOp(inst.a) < function.intOp(inst.b))
replace(function, block, index, {IrCmd::JUMP, inst.c});
else
replace(function, block, index, {IrCmd::JUMP, inst.d});
}
break;
case IrCmd::JUMP_GE_UINT:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
{
if (unsigned(function.intOp(inst.a)) >= unsigned(function.intOp(inst.b)))
replace(function, block, index, {IrCmd::JUMP, inst.c});
else
replace(function, block, index, {IrCmd::JUMP, inst.d});
}
break;
case IrCmd::JUMP_CMP_NUM:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
{
if (compare(function.doubleOp(inst.a), function.doubleOp(inst.b), conditionOp(inst.c)))
replace(function, block, index, {IrCmd::JUMP, inst.d});
else
replace(function, block, index, {IrCmd::JUMP, inst.e});
}
break;
case IrCmd::TRY_NUM_TO_INDEX:
if (inst.a.kind == IrOpKind::Constant)
{
double value = function.doubleOp(inst.a);
// To avoid undefined behavior of casting a value not representable in the target type, we check the range
if (value >= INT_MIN && value <= INT_MAX)
{
int arrIndex = int(value);
if (double(arrIndex) == value)
substitute(function, inst, build.constInt(arrIndex));
else
replace(function, block, index, {IrCmd::JUMP, inst.b});
}
else
{
replace(function, block, index, {IrCmd::JUMP, inst.b});
}
}
break;
case IrCmd::INT_TO_NUM:
if (inst.a.kind == IrOpKind::Constant)
substitute(function, inst, build.constDouble(double(function.intOp(inst.a))));
break;
case IrCmd::UINT_TO_NUM:
if (inst.a.kind == IrOpKind::Constant)
substitute(function, inst, build.constDouble(double(unsigned(function.intOp(inst.a)))));
break;
case IrCmd::NUM_TO_INT:
if (inst.a.kind == IrOpKind::Constant)
{
double value = function.doubleOp(inst.a);
// To avoid undefined behavior of casting a value not representable in the target type, we check the range
if (value >= INT_MIN && value <= INT_MAX)
substitute(function, inst, build.constInt(int(value)));
}
break;
case IrCmd::NUM_TO_UINT:
if (inst.a.kind == IrOpKind::Constant)
{
double value = function.doubleOp(inst.a);
// To avoid undefined behavior of casting a value not representable in the target type, we check the range
if (value >= 0 && value <= UINT_MAX)
substitute(function, inst, build.constInt(unsigned(function.doubleOp(inst.a))));
}
break;
case IrCmd::CHECK_TAG:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
{
if (function.tagOp(inst.a) == function.tagOp(inst.b))
kill(function, inst);
else
replace(function, block, index, {IrCmd::JUMP, inst.c}); // Shows a conflict in assumptions on this path
}
break;
case IrCmd::BITAND_UINT:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
{
unsigned op1 = unsigned(function.intOp(inst.a));
unsigned op2 = unsigned(function.intOp(inst.b));
substitute(function, inst, build.constInt(op1 & op2));
}
else
{
if (inst.a.kind == IrOpKind::Constant && function.intOp(inst.a) == 0) // (0 & b) -> 0
substitute(function, inst, build.constInt(0));
else if (inst.a.kind == IrOpKind::Constant && function.intOp(inst.a) == -1) // (-1 & b) -> b
substitute(function, inst, inst.b);
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0) // (a & 0) -> 0
substitute(function, inst, build.constInt(0));
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == -1) // (a & -1) -> a
substitute(function, inst, inst.a);
}
break;
case IrCmd::BITXOR_UINT:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
{
unsigned op1 = unsigned(function.intOp(inst.a));
unsigned op2 = unsigned(function.intOp(inst.b));
substitute(function, inst, build.constInt(op1 ^ op2));
}
else
{
if (inst.a.kind == IrOpKind::Constant && function.intOp(inst.a) == 0) // (0 ^ b) -> b
substitute(function, inst, inst.b);
else if (inst.a.kind == IrOpKind::Constant && function.intOp(inst.a) == -1) // (-1 ^ b) -> ~b
replace(function, block, index, {IrCmd::BITNOT_UINT, inst.b});
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0) // (a ^ 0) -> a
substitute(function, inst, inst.a);
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == -1) // (a ^ -1) -> ~a
replace(function, block, index, {IrCmd::BITNOT_UINT, inst.a});
}
break;
case IrCmd::BITOR_UINT:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
{
unsigned op1 = unsigned(function.intOp(inst.a));
unsigned op2 = unsigned(function.intOp(inst.b));
substitute(function, inst, build.constInt(op1 | op2));
}
else
{
if (inst.a.kind == IrOpKind::Constant && function.intOp(inst.a) == 0) // (0 | b) -> b
substitute(function, inst, inst.b);
else if (inst.a.kind == IrOpKind::Constant && function.intOp(inst.a) == -1) // (-1 | b) -> -1
substitute(function, inst, build.constInt(-1));
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0) // (a | 0) -> a
substitute(function, inst, inst.a);
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == -1) // (a | -1) -> -1
substitute(function, inst, build.constInt(-1));
}
break;
case IrCmd::BITNOT_UINT:
if (inst.a.kind == IrOpKind::Constant)
substitute(function, inst, build.constInt(~unsigned(function.intOp(inst.a))));
break;
case IrCmd::BITLSHIFT_UINT:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
{
unsigned op1 = unsigned(function.intOp(inst.a));
int op2 = function.intOp(inst.b);
if (unsigned(op2) < 32)
substitute(function, inst, build.constInt(op1 << op2));
}
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0)
{
substitute(function, inst, inst.a);
}
break;
case IrCmd::BITRSHIFT_UINT:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
{
unsigned op1 = unsigned(function.intOp(inst.a));
int op2 = function.intOp(inst.b);
if (unsigned(op2) < 32)
substitute(function, inst, build.constInt(op1 >> op2));
}
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0)
{
substitute(function, inst, inst.a);
}
break;
case IrCmd::BITARSHIFT_UINT:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
{
int op1 = function.intOp(inst.a);
int op2 = function.intOp(inst.b);
if (unsigned(op2) < 32)
{
// note: technically right shift of negative values is UB, but this behavior is getting defined in C++20 and all compilers do the
// right (shift) thing.
substitute(function, inst, build.constInt(op1 >> op2));
}
}
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0)
{
substitute(function, inst, inst.a);
}
break;
case IrCmd::BITLROTATE_UINT:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
substitute(function, inst, build.constInt(lrotate(unsigned(function.intOp(inst.a)), function.intOp(inst.b))));
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0)
substitute(function, inst, inst.a);
break;
case IrCmd::BITRROTATE_UINT:
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
substitute(function, inst, build.constInt(rrotate(unsigned(function.intOp(inst.a)), function.intOp(inst.b))));
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0)
substitute(function, inst, inst.a);
break;
case IrCmd::BITCOUNTLZ_UINT:
if (inst.a.kind == IrOpKind::Constant)
substitute(function, inst, build.constInt(countlz(unsigned(function.intOp(inst.a)))));
break;
case IrCmd::BITCOUNTRZ_UINT:
if (inst.a.kind == IrOpKind::Constant)
substitute(function, inst, build.constInt(countrz(unsigned(function.intOp(inst.a)))));
break;
default:
break;
}
}
uint32_t getNativeContextOffset(int bfid)
{
switch (bfid)
{
case LBF_MATH_ACOS:
return offsetof(NativeContext, libm_acos);
case LBF_MATH_ASIN:
return offsetof(NativeContext, libm_asin);
case LBF_MATH_ATAN2:
return offsetof(NativeContext, libm_atan2);
case LBF_MATH_ATAN:
return offsetof(NativeContext, libm_atan);
case LBF_MATH_COSH:
return offsetof(NativeContext, libm_cosh);
case LBF_MATH_COS:
return offsetof(NativeContext, libm_cos);
case LBF_MATH_EXP:
return offsetof(NativeContext, libm_exp);
case LBF_MATH_LOG10:
return offsetof(NativeContext, libm_log10);
case LBF_MATH_LOG:
return offsetof(NativeContext, libm_log);
case LBF_MATH_SINH:
return offsetof(NativeContext, libm_sinh);
case LBF_MATH_SIN:
return offsetof(NativeContext, libm_sin);
case LBF_MATH_TANH:
return offsetof(NativeContext, libm_tanh);
case LBF_MATH_TAN:
return offsetof(NativeContext, libm_tan);
case LBF_MATH_FMOD:
return offsetof(NativeContext, libm_fmod);
case LBF_MATH_POW:
return offsetof(NativeContext, libm_pow);
case LBF_IR_MATH_LOG2:
return offsetof(NativeContext, libm_log2);
case LBF_MATH_LDEXP:
return offsetof(NativeContext, libm_ldexp);
default:
LUAU_ASSERT(!"Unsupported bfid");
}
return 0;
}
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,222 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "IrValueLocationTracking.h"
namespace Luau
{
namespace CodeGen
{
IrValueLocationTracking::IrValueLocationTracking(IrFunction& function)
: function(function)
{
vmRegValue.fill(kInvalidInstIdx);
}
void IrValueLocationTracking::setRestoreCallack(void* context, void (*callback)(void* context, IrInst& inst))
{
restoreCallbackCtx = context;
restoreCallback = callback;
}
void IrValueLocationTracking::beforeInstLowering(IrInst& inst)
{
switch (inst.cmd)
{
case IrCmd::STORE_TAG:
case IrCmd::STORE_POINTER:
case IrCmd::STORE_DOUBLE:
case IrCmd::STORE_INT:
case IrCmd::STORE_VECTOR:
case IrCmd::STORE_TVALUE:
invalidateRestoreOp(inst.a);
break;
case IrCmd::ADJUST_STACK_TO_REG:
invalidateRestoreVmRegs(vmRegOp(inst.a), -1);
break;
case IrCmd::FASTCALL:
invalidateRestoreVmRegs(vmRegOp(inst.b), function.intOp(inst.f));
break;
case IrCmd::INVOKE_FASTCALL:
// Multiple return sequences (count == -1) are defined by ADJUST_STACK_TO_REG
if (int count = function.intOp(inst.f); count != -1)
invalidateRestoreVmRegs(vmRegOp(inst.b), count);
break;
case IrCmd::DO_ARITH:
case IrCmd::DO_LEN:
case IrCmd::GET_TABLE:
case IrCmd::GET_IMPORT:
invalidateRestoreOp(inst.a);
break;
case IrCmd::CONCAT:
invalidateRestoreVmRegs(vmRegOp(inst.a), function.uintOp(inst.b));
break;
case IrCmd::GET_UPVALUE:
invalidateRestoreOp(inst.a);
break;
case IrCmd::PREPARE_FORN:
invalidateRestoreOp(inst.a);
invalidateRestoreOp(inst.b);
invalidateRestoreOp(inst.c);
break;
case IrCmd::CALL:
// Even if result count is limited, all registers starting from function (ra) might be modified
invalidateRestoreVmRegs(vmRegOp(inst.a), -1);
break;
case IrCmd::FORGLOOP:
case IrCmd::FORGLOOP_FALLBACK:
// Even if result count is limited, all registers starting from iteration index (ra+2) might be modified
invalidateRestoreVmRegs(vmRegOp(inst.a) + 2, -1);
break;
case IrCmd::FALLBACK_GETGLOBAL:
case IrCmd::FALLBACK_GETTABLEKS:
invalidateRestoreOp(inst.b);
break;
case IrCmd::FALLBACK_NAMECALL:
invalidateRestoreVmRegs(vmRegOp(inst.b), 2);
break;
case IrCmd::FALLBACK_GETVARARGS:
invalidateRestoreVmRegs(vmRegOp(inst.b), function.intOp(inst.c));
break;
case IrCmd::FALLBACK_NEWCLOSURE:
case IrCmd::FALLBACK_DUPCLOSURE:
invalidateRestoreOp(inst.b);
break;
case IrCmd::FALLBACK_FORGPREP:
invalidateRestoreVmRegs(vmRegOp(inst.b), 3);
break;
// Make sure all VmReg referencing instructions are handled explicitly (only register reads here)
case IrCmd::LOAD_TAG:
case IrCmd::LOAD_POINTER:
case IrCmd::LOAD_DOUBLE:
case IrCmd::LOAD_INT:
case IrCmd::LOAD_TVALUE:
case IrCmd::JUMP_IF_TRUTHY:
case IrCmd::JUMP_IF_FALSY:
case IrCmd::JUMP_CMP_ANY:
case IrCmd::SET_TABLE:
case IrCmd::SET_UPVALUE:
case IrCmd::INTERRUPT:
case IrCmd::BARRIER_OBJ:
case IrCmd::BARRIER_TABLE_FORWARD:
case IrCmd::CLOSE_UPVALS:
case IrCmd::CAPTURE:
case IrCmd::SETLIST:
case IrCmd::RETURN:
case IrCmd::FORGPREP_XNEXT_FALLBACK:
case IrCmd::FALLBACK_SETGLOBAL:
case IrCmd::FALLBACK_SETTABLEKS:
case IrCmd::FALLBACK_PREPVARARGS:
case IrCmd::ADJUST_STACK_TO_TOP:
break;
// These instrucitons read VmReg only after optimizeMemoryOperandsX64
case IrCmd::CHECK_TAG:
case IrCmd::ADD_NUM:
case IrCmd::SUB_NUM:
case IrCmd::MUL_NUM:
case IrCmd::DIV_NUM:
case IrCmd::MOD_NUM:
case IrCmd::MIN_NUM:
case IrCmd::MAX_NUM:
case IrCmd::JUMP_EQ_TAG:
case IrCmd::JUMP_CMP_NUM:
break;
default:
// All instructions which reference registers have to be handled explicitly
LUAU_ASSERT(inst.a.kind != IrOpKind::VmReg);
LUAU_ASSERT(inst.b.kind != IrOpKind::VmReg);
LUAU_ASSERT(inst.c.kind != IrOpKind::VmReg);
LUAU_ASSERT(inst.d.kind != IrOpKind::VmReg);
LUAU_ASSERT(inst.e.kind != IrOpKind::VmReg);
LUAU_ASSERT(inst.f.kind != IrOpKind::VmReg);
break;
}
}
void IrValueLocationTracking::afterInstLowering(IrInst& inst, uint32_t instIdx)
{
switch (inst.cmd)
{
case IrCmd::LOAD_TAG:
case IrCmd::LOAD_POINTER:
case IrCmd::LOAD_DOUBLE:
case IrCmd::LOAD_INT:
case IrCmd::LOAD_TVALUE:
recordRestoreOp(instIdx, inst.a);
break;
case IrCmd::STORE_POINTER:
case IrCmd::STORE_DOUBLE:
case IrCmd::STORE_INT:
case IrCmd::STORE_TVALUE:
// If this is not the last use of the stored value, we can restore it from this new location
if (inst.b.kind == IrOpKind::Inst && function.instOp(inst.b).lastUse != instIdx)
recordRestoreOp(inst.b.index, inst.a);
break;
default:
break;
}
}
void IrValueLocationTracking::recordRestoreOp(uint32_t instIdx, IrOp location)
{
if (location.kind == IrOpKind::VmReg)
{
int reg = vmRegOp(location);
if (reg > maxReg)
maxReg = reg;
// Record location in register memory only if register is not captured
if (!function.cfg.captured.regs.test(reg))
function.recordRestoreOp(instIdx, location);
vmRegValue[reg] = instIdx;
}
else if (location.kind == IrOpKind::VmConst)
{
function.recordRestoreOp(instIdx, location);
}
}
void IrValueLocationTracking::invalidateRestoreOp(IrOp location)
{
if (location.kind == IrOpKind::VmReg)
{
uint32_t& instIdx = vmRegValue[vmRegOp(location)];
if (instIdx != kInvalidInstIdx)
{
IrInst& inst = function.instructions[instIdx];
// If instruction value is spilled and memory location is about to be lost, it has to be restored immediately
if (inst.needsReload)
restoreCallback(restoreCallbackCtx, inst);
// Instruction loses its memory storage location
function.recordRestoreOp(instIdx, IrOp());
// Register loses link with instruction
instIdx = kInvalidInstIdx;
}
}
else if (location.kind == IrOpKind::VmConst)
{
LUAU_ASSERT(!"VM constants are immutable");
}
}
void IrValueLocationTracking::invalidateRestoreVmRegs(int start, int count)
{
int end = count == -1 ? 255 : start + count;
if (end > maxReg)
end = maxReg;
for (int reg = start; reg <= end; reg++)
invalidateRestoreOp(IrOp{IrOpKind::VmReg, uint8_t(reg)});
}
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,38 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/IrData.h"
#include <array>
namespace Luau
{
namespace CodeGen
{
struct IrValueLocationTracking
{
IrValueLocationTracking(IrFunction& function);
void setRestoreCallack(void* context, void (*callback)(void* context, IrInst& inst));
void beforeInstLowering(IrInst& inst);
void afterInstLowering(IrInst& inst, uint32_t instIdx);
void recordRestoreOp(uint32_t instIdx, IrOp location);
void invalidateRestoreOp(IrOp location);
void invalidateRestoreVmRegs(int start, int count);
IrFunction& function;
std::array<uint32_t, 256> vmRegValue;
// For range/full invalidations, we only want to visit a limited number of data that we have recorded
int maxReg = 0;
void* restoreCallbackCtx = nullptr;
void (*restoreCallback)(void* context, IrInst& inst) = nullptr;
};
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,111 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "NativeState.h"
#include "Luau/UnwindBuilder.h"
#include "CodeGenUtils.h"
#include "CustomExecUtils.h"
#include "lbuiltins.h"
#include "lgc.h"
#include "ltable.h"
#include "lfunc.h"
#include "lvm.h"
#include <math.h>
#include <string.h>
namespace Luau
{
namespace CodeGen
{
constexpr unsigned kBlockSize = 4 * 1024 * 1024;
constexpr unsigned kMaxTotalSize = 256 * 1024 * 1024;
NativeState::NativeState()
: codeAllocator(kBlockSize, kMaxTotalSize)
{
}
NativeState::~NativeState() = default;
void initFunctions(NativeState& data)
{
static_assert(sizeof(data.context.luauF_table) == sizeof(luauF_table), "fastcall tables are not of the same length");
memcpy(data.context.luauF_table, luauF_table, sizeof(luauF_table));
data.context.luaV_lessthan = luaV_lessthan;
data.context.luaV_lessequal = luaV_lessequal;
data.context.luaV_equalval = luaV_equalval;
data.context.luaV_doarith = luaV_doarith;
data.context.luaV_dolen = luaV_dolen;
data.context.luaV_prepareFORN = luaV_prepareFORN;
data.context.luaV_gettable = luaV_gettable;
data.context.luaV_settable = luaV_settable;
data.context.luaV_getimport = luaV_getimport;
data.context.luaV_concat = luaV_concat;
data.context.luaH_getn = luaH_getn;
data.context.luaH_new = luaH_new;
data.context.luaH_clone = luaH_clone;
data.context.luaH_resizearray = luaH_resizearray;
data.context.luaC_barriertable = luaC_barriertable;
data.context.luaC_barrierf = luaC_barrierf;
data.context.luaC_barrierback = luaC_barrierback;
data.context.luaC_step = luaC_step;
data.context.luaF_close = luaF_close;
data.context.luaT_gettm = luaT_gettm;
data.context.luaT_objtypenamestr = luaT_objtypenamestr;
data.context.libm_exp = exp;
data.context.libm_pow = pow;
data.context.libm_fmod = fmod;
data.context.libm_log = log;
data.context.libm_log2 = log2;
data.context.libm_log10 = log10;
data.context.libm_ldexp = ldexp;
data.context.libm_round = round;
data.context.libm_frexp = frexp;
data.context.libm_modf = modf;
data.context.libm_asin = asin;
data.context.libm_sin = sin;
data.context.libm_sinh = sinh;
data.context.libm_acos = acos;
data.context.libm_cos = cos;
data.context.libm_cosh = cosh;
data.context.libm_atan = atan;
data.context.libm_atan2 = atan2;
data.context.libm_tan = tan;
data.context.libm_tanh = tanh;
data.context.forgLoopTableIter = forgLoopTableIter;
data.context.forgLoopNodeIter = forgLoopNodeIter;
data.context.forgLoopNonTableFallback = forgLoopNonTableFallback;
data.context.forgPrepXnextFallback = forgPrepXnextFallback;
data.context.callProlog = callProlog;
data.context.callEpilogC = callEpilogC;
data.context.callFallback = callFallback;
data.context.returnFallback = returnFallback;
data.context.executeGETGLOBAL = executeGETGLOBAL;
data.context.executeSETGLOBAL = executeSETGLOBAL;
data.context.executeGETTABLEKS = executeGETTABLEKS;
data.context.executeSETTABLEKS = executeSETTABLEKS;
data.context.executeNEWCLOSURE = executeNEWCLOSURE;
data.context.executeNAMECALL = executeNAMECALL;
data.context.executeFORGPREP = executeFORGPREP;
data.context.executeGETVARARGS = executeGETVARARGS;
data.context.executeDUPCLOSURE = executeDUPCLOSURE;
data.context.executePREPVARARGS = executePREPVARARGS;
data.context.executeSETLIST = executeSETLIST;
}
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,127 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/Bytecode.h"
#include "Luau/CodeAllocator.h"
#include "Luau/Label.h"
#include <memory>
#include <stdint.h>
#include "ldebug.h"
#include "lobject.h"
#include "ltm.h"
#include "lstate.h"
typedef int (*luau_FastFunction)(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams);
namespace Luau
{
namespace CodeGen
{
class UnwindBuilder;
struct NativeContext
{
// Gateway (C => native transition) entry & exit, compiled at runtime
uint8_t* gateEntry = nullptr;
uint8_t* gateExit = nullptr;
// Helper functions, implemented in C
int (*luaV_lessthan)(lua_State* L, const TValue* l, const TValue* r) = nullptr;
int (*luaV_lessequal)(lua_State* L, const TValue* l, const TValue* r) = nullptr;
int (*luaV_equalval)(lua_State* L, const TValue* t1, const TValue* t2) = nullptr;
void (*luaV_doarith)(lua_State* L, StkId ra, const TValue* rb, const TValue* rc, TMS op) = nullptr;
void (*luaV_dolen)(lua_State* L, StkId ra, const TValue* rb) = nullptr;
void (*luaV_prepareFORN)(lua_State* L, StkId plimit, StkId pstep, StkId pinit) = nullptr;
void (*luaV_gettable)(lua_State* L, const TValue* t, TValue* key, StkId val) = nullptr;
void (*luaV_settable)(lua_State* L, const TValue* t, TValue* key, StkId val) = nullptr;
void (*luaV_getimport)(lua_State* L, Table* env, TValue* k, uint32_t id, bool propagatenil) = nullptr;
void (*luaV_concat)(lua_State* L, int total, int last) = nullptr;
int (*luaH_getn)(Table* t) = nullptr;
Table* (*luaH_new)(lua_State* L, int narray, int lnhash) = nullptr;
Table* (*luaH_clone)(lua_State* L, Table* tt) = nullptr;
void (*luaH_resizearray)(lua_State* L, Table* t, int nasize) = nullptr;
void (*luaC_barriertable)(lua_State* L, Table* t, GCObject* v) = nullptr;
void (*luaC_barrierf)(lua_State* L, GCObject* o, GCObject* v) = nullptr;
void (*luaC_barrierback)(lua_State* L, GCObject* o, GCObject** gclist) = nullptr;
size_t (*luaC_step)(lua_State* L, bool assist) = nullptr;
void (*luaF_close)(lua_State* L, StkId level) = nullptr;
const TValue* (*luaT_gettm)(Table* events, TMS event, TString* ename) = nullptr;
const TString* (*luaT_objtypenamestr)(lua_State* L, const TValue* o) = nullptr;
double (*libm_exp)(double) = nullptr;
double (*libm_pow)(double, double) = nullptr;
double (*libm_fmod)(double, double) = nullptr;
double (*libm_asin)(double) = nullptr;
double (*libm_sin)(double) = nullptr;
double (*libm_sinh)(double) = nullptr;
double (*libm_acos)(double) = nullptr;
double (*libm_cos)(double) = nullptr;
double (*libm_cosh)(double) = nullptr;
double (*libm_atan)(double) = nullptr;
double (*libm_atan2)(double, double) = nullptr;
double (*libm_tan)(double) = nullptr;
double (*libm_tanh)(double) = nullptr;
double (*libm_log)(double) = nullptr;
double (*libm_log2)(double) = nullptr;
double (*libm_log10)(double) = nullptr;
double (*libm_ldexp)(double, int) = nullptr;
double (*libm_round)(double) = nullptr;
double (*libm_frexp)(double, int*) = nullptr;
double (*libm_modf)(double, double*) = nullptr;
// Helper functions
bool (*forgLoopTableIter)(lua_State* L, Table* h, int index, TValue* ra) = nullptr;
bool (*forgLoopNodeIter)(lua_State* L, Table* h, int index, TValue* ra) = nullptr;
bool (*forgLoopNonTableFallback)(lua_State* L, int insnA, int aux) = nullptr;
void (*forgPrepXnextFallback)(lua_State* L, TValue* ra, int pc) = nullptr;
Closure* (*callProlog)(lua_State* L, TValue* ra, StkId argtop, int nresults) = nullptr;
void (*callEpilogC)(lua_State* L, int nresults, int n) = nullptr;
Closure* (*callFallback)(lua_State* L, StkId ra, StkId argtop, int nresults) = nullptr;
Closure* (*returnFallback)(lua_State* L, StkId ra, StkId valend) = nullptr;
// Opcode fallbacks, implemented in C
const Instruction* (*executeGETGLOBAL)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
const Instruction* (*executeSETGLOBAL)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
const Instruction* (*executeGETTABLEKS)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
const Instruction* (*executeSETTABLEKS)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
const Instruction* (*executeNEWCLOSURE)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
const Instruction* (*executeNAMECALL)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
const Instruction* (*executeSETLIST)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
const Instruction* (*executeFORGPREP)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
const Instruction* (*executeGETVARARGS)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
const Instruction* (*executeDUPCLOSURE)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
const Instruction* (*executePREPVARARGS)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
// Fast call methods, implemented in C
luau_FastFunction luauF_table[256] = {};
};
using GateFn = int (*)(lua_State*, Proto*, uintptr_t, NativeContext*);
struct NativeState
{
NativeState();
~NativeState();
CodeAllocator codeAllocator;
std::unique_ptr<UnwindBuilder> unwindBuilder;
uint8_t* gateData = nullptr;
size_t gateDataSize = 0;
NativeContext context;
};
void initFunctions(NativeState& data);
} // namespace CodeGen
} // namespace Luau

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,109 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/OptimizeFinalX64.h"
#include "Luau/IrUtils.h"
#include <utility>
namespace Luau
{
namespace CodeGen
{
// x64 assembly allows memory operands, but IR separates loads from uses
// To improve final x64 lowering, we try to 'inline' single-use register/constant loads into some of our instructions
// This pass might not be useful on different architectures
static void optimizeMemoryOperandsX64(IrFunction& function, IrBlock& block)
{
LUAU_ASSERT(block.kind != IrBlockKind::Dead);
for (uint32_t index = block.start; index <= block.finish; index++)
{
LUAU_ASSERT(index < function.instructions.size());
IrInst& inst = function.instructions[index];
switch (inst.cmd)
{
case IrCmd::CHECK_TAG:
{
if (inst.a.kind == IrOpKind::Inst)
{
IrInst& tag = function.instOp(inst.a);
if (tag.useCount == 1 && tag.cmd == IrCmd::LOAD_TAG && (tag.a.kind == IrOpKind::VmReg || tag.a.kind == IrOpKind::VmConst))
replace(function, inst.a, tag.a);
}
break;
}
case IrCmd::ADD_NUM:
case IrCmd::SUB_NUM:
case IrCmd::MUL_NUM:
case IrCmd::DIV_NUM:
case IrCmd::MOD_NUM:
case IrCmd::MIN_NUM:
case IrCmd::MAX_NUM:
{
if (inst.b.kind == IrOpKind::Inst)
{
IrInst& rhs = function.instOp(inst.b);
if (rhs.useCount == 1 && rhs.cmd == IrCmd::LOAD_DOUBLE && (rhs.a.kind == IrOpKind::VmReg || rhs.a.kind == IrOpKind::VmConst))
replace(function, inst.b, rhs.a);
}
break;
}
case IrCmd::JUMP_EQ_TAG:
{
if (inst.a.kind == IrOpKind::Inst)
{
IrInst& tagA = function.instOp(inst.a);
if (tagA.useCount == 1 && tagA.cmd == IrCmd::LOAD_TAG && (tagA.a.kind == IrOpKind::VmReg || tagA.a.kind == IrOpKind::VmConst))
{
replace(function, inst.a, tagA.a);
break;
}
}
if (inst.b.kind == IrOpKind::Inst)
{
IrInst& tagB = function.instOp(inst.b);
if (tagB.useCount == 1 && tagB.cmd == IrCmd::LOAD_TAG && (tagB.a.kind == IrOpKind::VmReg || tagB.a.kind == IrOpKind::VmConst))
{
std::swap(inst.a, inst.b);
replace(function, inst.a, tagB.a);
}
}
break;
}
case IrCmd::JUMP_CMP_NUM:
{
if (inst.a.kind == IrOpKind::Inst)
{
IrInst& num = function.instOp(inst.a);
if (num.useCount == 1 && num.cmd == IrCmd::LOAD_DOUBLE)
replace(function, inst.a, num.a);
}
break;
}
default:
break;
}
}
}
void optimizeMemoryOperandsX64(IrFunction& function)
{
for (IrBlock& block : function.blocks)
{
if (block.kind == IrBlockKind::Dead)
continue;
optimizeMemoryOperandsX64(function, block);
}
}
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,299 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/UnwindBuilderDwarf2.h"
#include "ByteUtils.h"
#include <string.h>
// General information about Dwarf2 format can be found at:
// https://dwarfstd.org/doc/dwarf-2.0.0.pdf [DWARF Debugging Information Format]
// Main part for async exception unwinding is in section '6.4 Call Frame Information'
// Information about System V ABI (AMD64) can be found at:
// https://refspecs.linuxbase.org/elf/x86_64-abi-0.99.pdf [System V Application Binary Interface (AMD64 Architecture Processor Supplement)]
// Interaction between Dwarf2 and System V ABI can be found in sections '3.6.2 DWARF Register Number Mapping' and '4.2.4 EH_FRAME sections'
// Call frame instruction opcodes (Dwarf2, page 78, ch. 7.23 figure 37)
#define DW_CFA_advance_loc 0x40
#define DW_CFA_offset 0x80
#define DW_CFA_restore 0xc0
#define DW_CFA_set_loc 0x01
#define DW_CFA_advance_loc1 0x02
#define DW_CFA_advance_loc2 0x03
#define DW_CFA_advance_loc4 0x04
#define DW_CFA_offset_extended 0x05
#define DW_CFA_restore_extended 0x06
#define DW_CFA_undefined 0x07
#define DW_CFA_same_value 0x08
#define DW_CFA_register 0x09
#define DW_CFA_remember_state 0x0a
#define DW_CFA_restore_state 0x0b
#define DW_CFA_def_cfa 0x0c
#define DW_CFA_def_cfa_register 0x0d
#define DW_CFA_def_cfa_offset 0x0e
#define DW_CFA_def_cfa_expression 0x0f
#define DW_CFA_nop 0x00
#define DW_CFA_lo_user 0x1c
#define DW_CFA_hi_user 0x3f
// Register numbers for X64 (System V ABI, page 57, ch. 3.7, figure 3.36)
#define DW_REG_X64_RAX 0
#define DW_REG_X64_RDX 1
#define DW_REG_X64_RCX 2
#define DW_REG_X64_RBX 3
#define DW_REG_X64_RSI 4
#define DW_REG_X64_RDI 5
#define DW_REG_X64_RBP 6
#define DW_REG_X64_RSP 7
#define DW_REG_X64_RA 16
// Register numbers for A64 (DWARF for the Arm 64-bit Architecture, ch. 4.1)
#define DW_REG_A64_FP 29
#define DW_REG_A64_LR 30
#define DW_REG_A64_SP 31
// X64 register mapping from real register index to DWARF2 (r8..r15 are mapped 1-1, but named registers aren't)
const int regIndexToDwRegX64[16] = {DW_REG_X64_RAX, DW_REG_X64_RCX, DW_REG_X64_RDX, DW_REG_X64_RBX, DW_REG_X64_RSP, DW_REG_X64_RBP, DW_REG_X64_RSI,
DW_REG_X64_RDI, 8, 9, 10, 11, 12, 13, 14, 15};
const int kCodeAlignFactor = 1;
const int kDataAlignFactor = 8;
const int kDwarfAlign = 8;
const int kFdeInitialLocationOffset = 8;
const int kFdeAddressRangeOffset = 16;
// Define canonical frame address expression as [reg + offset]
static uint8_t* defineCfaExpression(uint8_t* pos, int dwReg, uint32_t stackOffset)
{
pos = writeu8(pos, DW_CFA_def_cfa);
pos = writeuleb128(pos, dwReg);
pos = writeuleb128(pos, stackOffset);
return pos;
}
// Update offset value in canonical frame address expression
static uint8_t* defineCfaExpressionOffset(uint8_t* pos, uint32_t stackOffset)
{
pos = writeu8(pos, DW_CFA_def_cfa_offset);
pos = writeuleb128(pos, stackOffset);
return pos;
}
static uint8_t* defineSavedRegisterLocation(uint8_t* pos, int dwReg, uint32_t stackOffset)
{
LUAU_ASSERT(stackOffset % kDataAlignFactor == 0 && "stack offsets have to be measured in kDataAlignFactor units");
if (dwReg <= 0x3f)
{
pos = writeu8(pos, DW_CFA_offset + dwReg);
}
else
{
pos = writeu8(pos, DW_CFA_offset_extended);
pos = writeuleb128(pos, dwReg);
}
pos = writeuleb128(pos, stackOffset / kDataAlignFactor);
return pos;
}
static uint8_t* advanceLocation(uint8_t* pos, unsigned int offset)
{
LUAU_ASSERT(offset < 256);
pos = writeu8(pos, DW_CFA_advance_loc1);
pos = writeu8(pos, offset);
return pos;
}
static uint8_t* alignPosition(uint8_t* start, uint8_t* pos)
{
size_t size = pos - start;
size_t pad = ((size + kDwarfAlign - 1) & ~(kDwarfAlign - 1)) - size;
for (size_t i = 0; i < pad; i++)
pos = writeu8(pos, DW_CFA_nop);
return pos;
}
namespace Luau
{
namespace CodeGen
{
void UnwindBuilderDwarf2::setBeginOffset(size_t beginOffset)
{
this->beginOffset = beginOffset;
}
size_t UnwindBuilderDwarf2::getBeginOffset() const
{
return beginOffset;
}
void UnwindBuilderDwarf2::startInfo(Arch arch)
{
LUAU_ASSERT(arch == A64 || arch == X64);
uint8_t* cieLength = pos;
pos = writeu32(pos, 0); // Length (to be filled later)
pos = writeu32(pos, 0); // CIE id. 0 -- .eh_frame
pos = writeu8(pos, 1); // Version
pos = writeu8(pos, 0); // CIE augmentation String ""
int ra = arch == A64 ? DW_REG_A64_LR : DW_REG_X64_RA;
pos = writeuleb128(pos, kCodeAlignFactor); // Code align factor
pos = writeuleb128(pos, -kDataAlignFactor & 0x7f); // Data align factor of (as signed LEB128)
pos = writeu8(pos, ra); // Return address register
// Optional CIE augmentation section (not present)
// Call frame instructions (common for all FDEs)
if (arch == A64)
{
pos = defineCfaExpression(pos, DW_REG_A64_SP, 0); // Define CFA to be the sp
}
else
{
pos = defineCfaExpression(pos, DW_REG_X64_RSP, 8); // Define CFA to be the rsp + 8
pos = defineSavedRegisterLocation(pos, DW_REG_X64_RA, 8); // Define return address register (RA) to be located at CFA - 8
}
pos = alignPosition(cieLength, pos);
writeu32(cieLength, unsigned(pos - cieLength - 4)); // Length field itself is excluded from length
}
void UnwindBuilderDwarf2::startFunction()
{
// End offset is filled in later and everything gets adjusted at the end
UnwindFunctionDwarf2 func;
func.beginOffset = 0;
func.endOffset = 0;
func.fdeEntryStartPos = uint32_t(pos - rawData);
unwindFunctions.push_back(func);
fdeEntryStart = pos; // Will be written at the end
pos = writeu32(pos, 0); // Length (to be filled later)
pos = writeu32(pos, unsigned(pos - rawData)); // CIE pointer
pos = writeu64(pos, 0); // Initial location (to be filled later)
pos = writeu64(pos, 0); // Address range (to be filled later)
// Optional CIE augmentation section (not present)
// Function call frame instructions to follow
}
void UnwindBuilderDwarf2::finishFunction(uint32_t beginOffset, uint32_t endOffset)
{
unwindFunctions.back().beginOffset = beginOffset;
unwindFunctions.back().endOffset = endOffset;
LUAU_ASSERT(fdeEntryStart != nullptr);
pos = alignPosition(fdeEntryStart, pos);
writeu32(fdeEntryStart, unsigned(pos - fdeEntryStart - 4)); // Length field itself is excluded from length
}
void UnwindBuilderDwarf2::finishInfo()
{
// Terminate section
pos = writeu32(pos, 0);
LUAU_ASSERT(getSize() <= kRawDataLimit);
}
void UnwindBuilderDwarf2::prologueA64(uint32_t prologueSize, uint32_t stackSize, std::initializer_list<A64::RegisterA64> regs)
{
LUAU_ASSERT(stackSize % 16 == 0);
LUAU_ASSERT(regs.size() >= 2 && regs.begin()[0] == A64::x29 && regs.begin()[1] == A64::x30);
LUAU_ASSERT(regs.size() * 8 <= stackSize);
// sub sp, sp, stackSize
pos = advanceLocation(pos, 4);
pos = defineCfaExpressionOffset(pos, stackSize);
// stp/str to store each register to stack in order
pos = advanceLocation(pos, prologueSize - 4);
for (size_t i = 0; i < regs.size(); ++i)
{
LUAU_ASSERT(regs.begin()[i].kind == A64::KindA64::x);
pos = defineSavedRegisterLocation(pos, regs.begin()[i].index, stackSize - unsigned(i * 8));
}
}
void UnwindBuilderDwarf2::prologueX64(uint32_t prologueSize, uint32_t stackSize, bool setupFrame, std::initializer_list<X64::RegisterX64> regs)
{
LUAU_ASSERT(stackSize > 0 && stackSize <= 128 && stackSize % 8 == 0);
unsigned int stackOffset = 8; // Return address was pushed by calling the function
unsigned int prologueOffset = 0;
if (setupFrame)
{
// push rbp
stackOffset += 8;
prologueOffset += 2;
pos = advanceLocation(pos, 2);
pos = defineCfaExpressionOffset(pos, stackOffset);
pos = defineSavedRegisterLocation(pos, DW_REG_X64_RBP, stackOffset);
// mov rbp, rsp
prologueOffset += 3;
pos = advanceLocation(pos, 3);
}
// push reg
for (X64::RegisterX64 reg : regs)
{
LUAU_ASSERT(reg.size == X64::SizeX64::qword);
stackOffset += 8;
prologueOffset += 2;
pos = advanceLocation(pos, 2);
pos = defineCfaExpressionOffset(pos, stackOffset);
pos = defineSavedRegisterLocation(pos, regIndexToDwRegX64[reg.index], stackOffset);
}
// sub rsp, stackSize
stackOffset += stackSize;
prologueOffset += 4;
pos = advanceLocation(pos, 4);
pos = defineCfaExpressionOffset(pos, stackOffset);
LUAU_ASSERT(stackOffset % 16 == 0);
LUAU_ASSERT(prologueOffset == prologueSize);
}
size_t UnwindBuilderDwarf2::getSize() const
{
return size_t(pos - rawData);
}
size_t UnwindBuilderDwarf2::getFunctionCount() const
{
return unwindFunctions.size();
}
void UnwindBuilderDwarf2::finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const
{
memcpy(target, rawData, getSize());
for (const UnwindFunctionDwarf2& func : unwindFunctions)
{
uint8_t* fdeEntry = (uint8_t*)target + func.fdeEntryStartPos;
writeu64(fdeEntry + kFdeInitialLocationOffset, uintptr_t(funcAddress) + offset + func.beginOffset);
if (func.endOffset == kFullBlockFuncton)
writeu64(fdeEntry + kFdeAddressRangeOffset, funcSize - offset);
else
writeu64(fdeEntry + kFdeAddressRangeOffset, func.endOffset - func.beginOffset);
}
}
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,190 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/UnwindBuilderWin.h"
#include <string.h>
// Information about the Windows x64 unwinding data setup can be found at:
// https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64 [x64 exception handling]
#define UWOP_PUSH_NONVOL 0
#define UWOP_ALLOC_LARGE 1
#define UWOP_ALLOC_SMALL 2
#define UWOP_SET_FPREG 3
#define UWOP_SAVE_NONVOL 4
#define UWOP_SAVE_NONVOL_FAR 5
#define UWOP_SAVE_XMM128 8
#define UWOP_SAVE_XMM128_FAR 9
#define UWOP_PUSH_MACHFRAME 10
namespace Luau
{
namespace CodeGen
{
void UnwindBuilderWin::setBeginOffset(size_t beginOffset)
{
this->beginOffset = beginOffset;
}
size_t UnwindBuilderWin::getBeginOffset() const
{
return beginOffset;
}
void UnwindBuilderWin::startInfo(Arch arch)
{
LUAU_ASSERT(arch == X64);
}
void UnwindBuilderWin::startFunction()
{
// End offset is filled in later and everything gets adjusted at the end
UnwindFunctionWin func;
func.beginOffset = 0;
func.endOffset = 0;
func.unwindInfoOffset = uint32_t(rawDataPos - rawData);
unwindFunctions.push_back(func);
unwindCodes.clear();
unwindCodes.reserve(16);
prologSize = 0;
// rax has register index 0, which in Windows unwind info means that frame register is not used
frameReg = X64::rax;
frameRegOffset = 0;
}
void UnwindBuilderWin::finishFunction(uint32_t beginOffset, uint32_t endOffset)
{
unwindFunctions.back().beginOffset = beginOffset;
unwindFunctions.back().endOffset = endOffset;
// Windows unwind code count is stored in uint8_t, so we can't have more
LUAU_ASSERT(unwindCodes.size() < 256);
UnwindInfoWin info;
info.version = 1;
info.flags = 0; // No EH
info.prologsize = prologSize;
info.unwindcodecount = uint8_t(unwindCodes.size());
LUAU_ASSERT(frameReg.index < 16);
info.framereg = frameReg.index;
LUAU_ASSERT(frameRegOffset < 16);
info.frameregoff = frameRegOffset;
LUAU_ASSERT(rawDataPos + sizeof(info) <= rawData + kRawDataLimit);
memcpy(rawDataPos, &info, sizeof(info));
rawDataPos += sizeof(info);
if (!unwindCodes.empty())
{
// Copy unwind codes in reverse order
// Some unwind codes take up two array slots, but we don't use those atm
uint8_t* unwindCodePos = rawDataPos + sizeof(UnwindCodeWin) * (unwindCodes.size() - 1);
LUAU_ASSERT(unwindCodePos <= rawData + kRawDataLimit);
for (size_t i = 0; i < unwindCodes.size(); i++)
{
memcpy(unwindCodePos, &unwindCodes[i], sizeof(UnwindCodeWin));
unwindCodePos -= sizeof(UnwindCodeWin);
}
}
rawDataPos += sizeof(UnwindCodeWin) * unwindCodes.size();
// Size has to be even, but unwind code count doesn't have to
if (unwindCodes.size() % 2 != 0)
rawDataPos += sizeof(UnwindCodeWin);
LUAU_ASSERT(rawDataPos <= rawData + kRawDataLimit);
}
void UnwindBuilderWin::finishInfo() {}
void UnwindBuilderWin::prologueA64(uint32_t prologueSize, uint32_t stackSize, std::initializer_list<A64::RegisterA64> regs)
{
LUAU_ASSERT(!"Not implemented");
}
void UnwindBuilderWin::prologueX64(uint32_t prologueSize, uint32_t stackSize, bool setupFrame, std::initializer_list<X64::RegisterX64> regs)
{
LUAU_ASSERT(stackSize > 0 && stackSize <= 128 && stackSize % 8 == 0);
LUAU_ASSERT(prologueSize < 256);
unsigned int stackOffset = 8; // Return address was pushed by calling the function
unsigned int prologueOffset = 0;
if (setupFrame)
{
// push rbp
stackOffset += 8;
prologueOffset += 2;
unwindCodes.push_back({uint8_t(prologueOffset), UWOP_PUSH_NONVOL, X64::rbp.index});
// mov rbp, rsp
prologueOffset += 3;
frameReg = X64::rbp;
frameRegOffset = 0;
unwindCodes.push_back({uint8_t(prologueOffset), UWOP_SET_FPREG, frameRegOffset});
}
// push reg
for (X64::RegisterX64 reg : regs)
{
LUAU_ASSERT(reg.size == X64::SizeX64::qword);
stackOffset += 8;
prologueOffset += 2;
unwindCodes.push_back({uint8_t(prologueOffset), UWOP_PUSH_NONVOL, reg.index});
}
// sub rsp, stackSize
stackOffset += stackSize;
prologueOffset += 4;
unwindCodes.push_back({uint8_t(prologueOffset), UWOP_ALLOC_SMALL, uint8_t((stackSize - 8) / 8)});
LUAU_ASSERT(stackOffset % 16 == 0);
LUAU_ASSERT(prologueOffset == prologueSize);
this->prologSize = prologueSize;
}
size_t UnwindBuilderWin::getSize() const
{
return sizeof(UnwindFunctionWin) * unwindFunctions.size() + size_t(rawDataPos - rawData);
}
size_t UnwindBuilderWin::getFunctionCount() const
{
return unwindFunctions.size();
}
void UnwindBuilderWin::finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const
{
// Copy adjusted function information
for (UnwindFunctionWin func : unwindFunctions)
{
// Code will start after the unwind info
func.beginOffset += uint32_t(offset);
// Whole block is a part of a 'single function'
if (func.endOffset == kFullBlockFuncton)
func.endOffset = uint32_t(funcSize);
else
func.endOffset += uint32_t(offset);
// Unwind data is placed right after the RUNTIME_FUNCTION data
func.unwindInfoOffset += uint32_t(sizeof(UnwindFunctionWin) * unwindFunctions.size());
memcpy(target, &func, sizeof(func));
target += sizeof(func);
}
// Copy unwind codes
memcpy(target, rawData, size_t(rawDataPos - rawData));
}
} // namespace CodeGen
} // namespace Luau

View File

@ -0,0 +1,21 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "luacodegen.h"
#include "Luau/CodeGen.h"
#include "lapi.h"
int luau_codegen_supported()
{
return Luau::CodeGen::isSupported();
}
void luau_codegen_create(lua_State* L)
{
Luau::CodeGen::create(L);
}
void luau_codegen_compile(lua_State* L, int idx)
{
Luau::CodeGen::compile(L, idx);
}

View File

@ -6,6 +6,8 @@ pub struct Build {
out_dir: Option<PathBuf>,
target: Option<String>,
host: Option<String>,
// Enable code generator (jit)
enable_codegen: bool,
}
pub struct Artifacts {
@ -22,6 +24,7 @@ impl Build {
out_dir: env::var_os("OUT_DIR").map(|s| PathBuf::from(s).join("luau-build")),
target: env::var("TARGET").ok(),
host: env::var("HOST").ok(),
enable_codegen: false,
}
}
@ -40,6 +43,11 @@ impl Build {
self
}
pub fn enable_codegen(&mut self, enable: bool) -> &mut Build {
self.enable_codegen = enable;
self
}
pub fn build(&mut self) -> Artifacts {
let target = &self.target.as_ref().expect("TARGET not set")[..];
let host = &self.host.as_ref().expect("HOST not set")[..];
@ -51,6 +59,8 @@ impl Build {
let common_include_dir = source_dir_base.join("luau").join("Common").join("include");
let ast_source_dir = source_dir_base.join("luau").join("Ast").join("src");
let ast_include_dir = source_dir_base.join("luau").join("Ast").join("include");
let codegen_source_dir = source_dir_base.join("luau").join("CodeGen").join("src");
let codegen_include_dir = source_dir_base.join("luau").join("CodeGen").join("include");
let compiler_source_dir = source_dir_base.join("luau").join("Compiler").join("src");
let compiler_include_dir = source_dir_base
.join("luau")
@ -81,6 +91,10 @@ impl Build {
.flag_if_supported("/std:c++17") // MSVC
.cpp(true);
if self.enable_codegen {
config.define("LUA_CUSTOM_EXECUTION", None);
}
if cfg!(not(debug_assertions)) {
config.define("NDEBUG", None);
config.opt_level(2);
@ -98,6 +112,24 @@ impl Build {
.out_dir(&lib_dir)
.compile(ast_lib_name);
// Build CogeGen
let codegen_lib_name = "luaucodegen";
if self.enable_codegen {
config
.clone()
.include(&codegen_include_dir)
.include(&common_include_dir)
.include(&vm_include_dir)
.include(&vm_source_dir)
.define("LUACODEGEN_API", "extern \"C\"")
// Code generator uses lua VM internals, so we need to provide the same defines used to build VM
.define("LUA_API", "extern \"C\"")
.define("LUAI_MAXCSTACK", "100000")
.add_files_by_ext(&codegen_source_dir, "cpp")
.out_dir(&lib_dir)
.compile(codegen_lib_name);
}
// Build Compiler
let compiler_lib_name = "luaucompiler";
config
@ -130,7 +162,7 @@ impl Build {
fs::copy(compiler_include_dir.join(f), include_dir.join(f)).unwrap();
}
Artifacts {
let mut artifacts = Artifacts {
lib_dir,
include_dir,
libs: vec![
@ -139,7 +171,13 @@ impl Build {
vm_lib_name.to_string(),
],
cpp_stdlib: Self::get_cpp_link_stdlib(target),
};
if self.enable_codegen {
artifacts.libs.push(codegen_lib_name.to_string());
}
artifacts
}
fn get_cpp_link_stdlib(target: &str) -> Option<String> {

View File

@ -1,5 +1,5 @@
fn main() {
println!("cargo:rerun-if-changed=build.rs");
let artifacts = luau0_src::Build::new().build();
let artifacts = luau0_src::Build::new().enable_codegen(true).build();
artifacts.print_cargo_metadata();
}

View File

@ -15,9 +15,14 @@ extern "C" {
pub fn free(ptr: *mut c_void);
pub fn luaL_newstate() -> *mut c_void;
pub fn lua_close(state: *mut c_void);
pub fn luaL_openlibs(state: *mut c_void);
pub fn lua_getfield(state: *mut c_void, index: c_int, k: *const c_char) -> c_int;
pub fn lua_tolstring(state: *mut c_void, index: c_int, len: *mut c_long) -> *const c_char;
pub fn lua_call(state: *mut c_void, nargs: c_int, nresults: c_int);
pub fn lua_pushinteger(state: *mut c_void, n: c_int);
pub fn lua_tointegerx(state: *mut c_void, index: c_int, isnum: *mut c_int) -> c_int;
pub fn luau_compile(
source: *const c_char,
@ -32,6 +37,10 @@ extern "C" {
size: usize,
env: c_int,
) -> c_int;
pub fn luau_codegen_supported() -> c_int;
pub fn luau_codegen_create(state: *mut c_void);
pub fn luau_codegen_compile(state: *mut c_void, idx: c_int);
}
pub unsafe fn lua_getglobal(state: *mut c_void, k: *const c_char) {
@ -45,6 +54,11 @@ fn luau_works() {
let state = luaL_newstate();
assert!(state != ptr::null_mut());
// Enable JIT if supported
if luau_codegen_supported() != 0 {
luau_codegen_create(state);
}
luaL_openlibs(state);
let version = {
@ -56,7 +70,7 @@ fn luau_works() {
assert_eq!(version, "Luau".as_bytes());
let code = "function sum(a, b) return a + b end\0";
let code = "local a, b = ... return a + b\0";
let mut bytecode_size = 0;
let bytecode = luau_compile(
code.as_ptr().cast(),
@ -64,8 +78,21 @@ fn luau_works() {
ptr::null_mut(),
&mut bytecode_size,
);
let result = luau_load(state, "test\0".as_ptr().cast(), bytecode, bytecode_size, 0);
let result = luau_load(state, "sum\0".as_ptr().cast(), bytecode, bytecode_size, 0);
assert_eq!(result, 0);
free(bytecode.cast());
// Compile the function (JIT, if supported)
if luau_codegen_supported() != 0 {
luau_codegen_compile(state, -1);
}
// Call the loaded function
lua_pushinteger(state, 123);
lua_pushinteger(state, 321);
lua_call(state, 2, 1);
assert_eq!(lua_tointegerx(state, -1, ptr::null_mut()), 444);
lua_close(state);
}
}