Add Luau CodeGen (jit implementation, currently experimental)
This commit is contained in:
parent
deb042b940
commit
3bfe1afb96
|
@ -0,0 +1,61 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/RegisterA64.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
namespace A64
|
||||
{
|
||||
|
||||
enum class AddressKindA64 : uint8_t
|
||||
{
|
||||
imm, // reg + imm
|
||||
reg, // reg + reg
|
||||
|
||||
// TODO:
|
||||
// reg + reg << shift
|
||||
// reg + sext(reg) << shift
|
||||
// reg + uext(reg) << shift
|
||||
};
|
||||
|
||||
struct AddressA64
|
||||
{
|
||||
// This is a little misleading since AddressA64 can encode offsets up to 1023*size where size depends on the load/store size
|
||||
// For example, ldr x0, [reg+imm] is limited to 8 KB offsets assuming imm is divisible by 8, but loading into w0 reduces the range to 4 KB
|
||||
static constexpr size_t kMaxOffset = 1023;
|
||||
|
||||
constexpr AddressA64(RegisterA64 base, int off = 0)
|
||||
: kind(AddressKindA64::imm)
|
||||
, base(base)
|
||||
, offset(xzr)
|
||||
, data(off)
|
||||
{
|
||||
LUAU_ASSERT(base.kind == KindA64::x || base == sp);
|
||||
}
|
||||
|
||||
constexpr AddressA64(RegisterA64 base, RegisterA64 offset)
|
||||
: kind(AddressKindA64::reg)
|
||||
, base(base)
|
||||
, offset(offset)
|
||||
, data(0)
|
||||
{
|
||||
LUAU_ASSERT(base.kind == KindA64::x);
|
||||
LUAU_ASSERT(offset.kind == KindA64::x);
|
||||
}
|
||||
|
||||
AddressKindA64 kind;
|
||||
RegisterA64 base;
|
||||
RegisterA64 offset;
|
||||
int data;
|
||||
};
|
||||
|
||||
using mem = AddressA64;
|
||||
|
||||
} // namespace A64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,280 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/RegisterA64.h"
|
||||
#include "Luau/AddressA64.h"
|
||||
#include "Luau/ConditionA64.h"
|
||||
#include "Luau/Label.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
namespace A64
|
||||
{
|
||||
|
||||
enum FeaturesA64
|
||||
{
|
||||
Feature_JSCVT = 1 << 0,
|
||||
};
|
||||
|
||||
class AssemblyBuilderA64
|
||||
{
|
||||
public:
|
||||
explicit AssemblyBuilderA64(bool logText, unsigned int features = 0);
|
||||
~AssemblyBuilderA64();
|
||||
|
||||
// Moves
|
||||
void mov(RegisterA64 dst, RegisterA64 src);
|
||||
void mov(RegisterA64 dst, int src); // macro
|
||||
|
||||
// Moves of 32-bit immediates get decomposed into one or more of these
|
||||
void movz(RegisterA64 dst, uint16_t src, int shift = 0);
|
||||
void movn(RegisterA64 dst, uint16_t src, int shift = 0);
|
||||
void movk(RegisterA64 dst, uint16_t src, int shift = 0);
|
||||
|
||||
// Arithmetics
|
||||
void add(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
|
||||
void add(RegisterA64 dst, RegisterA64 src1, uint16_t src2);
|
||||
void sub(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
|
||||
void sub(RegisterA64 dst, RegisterA64 src1, uint16_t src2);
|
||||
void neg(RegisterA64 dst, RegisterA64 src);
|
||||
|
||||
// Comparisons
|
||||
// Note: some arithmetic instructions also have versions that update flags (ADDS etc) but we aren't using them atm
|
||||
void cmp(RegisterA64 src1, RegisterA64 src2);
|
||||
void cmp(RegisterA64 src1, uint16_t src2);
|
||||
void csel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond);
|
||||
void cset(RegisterA64 dst, ConditionA64 cond);
|
||||
|
||||
// Bitwise
|
||||
void and_(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
|
||||
void orr(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
|
||||
void eor(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
|
||||
void bic(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
|
||||
void tst(RegisterA64 src1, RegisterA64 src2, int shift = 0);
|
||||
void mvn_(RegisterA64 dst, RegisterA64 src);
|
||||
|
||||
// Bitwise with immediate
|
||||
// Note: immediate must have a single contiguous sequence of 1 bits set of length 1..31
|
||||
void and_(RegisterA64 dst, RegisterA64 src1, uint32_t src2);
|
||||
void orr(RegisterA64 dst, RegisterA64 src1, uint32_t src2);
|
||||
void eor(RegisterA64 dst, RegisterA64 src1, uint32_t src2);
|
||||
void tst(RegisterA64 src1, uint32_t src2);
|
||||
|
||||
// Shifts
|
||||
void lsl(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
|
||||
void lsr(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
|
||||
void asr(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
|
||||
void ror(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
|
||||
void clz(RegisterA64 dst, RegisterA64 src);
|
||||
void rbit(RegisterA64 dst, RegisterA64 src);
|
||||
|
||||
// Shifts with immediates
|
||||
// Note: immediate value must be in [0, 31] or [0, 63] range based on register type
|
||||
void lsl(RegisterA64 dst, RegisterA64 src1, uint8_t src2);
|
||||
void lsr(RegisterA64 dst, RegisterA64 src1, uint8_t src2);
|
||||
void asr(RegisterA64 dst, RegisterA64 src1, uint8_t src2);
|
||||
void ror(RegisterA64 dst, RegisterA64 src1, uint8_t src2);
|
||||
|
||||
// Bitfields
|
||||
void ubfiz(RegisterA64 dst, RegisterA64 src, uint8_t f, uint8_t w);
|
||||
void ubfx(RegisterA64 dst, RegisterA64 src, uint8_t f, uint8_t w);
|
||||
void sbfiz(RegisterA64 dst, RegisterA64 src, uint8_t f, uint8_t w);
|
||||
void sbfx(RegisterA64 dst, RegisterA64 src, uint8_t f, uint8_t w);
|
||||
|
||||
// Load
|
||||
// Note: paired loads are currently omitted for simplicity
|
||||
void ldr(RegisterA64 dst, AddressA64 src);
|
||||
void ldrb(RegisterA64 dst, AddressA64 src);
|
||||
void ldrh(RegisterA64 dst, AddressA64 src);
|
||||
void ldrsb(RegisterA64 dst, AddressA64 src);
|
||||
void ldrsh(RegisterA64 dst, AddressA64 src);
|
||||
void ldrsw(RegisterA64 dst, AddressA64 src);
|
||||
void ldp(RegisterA64 dst1, RegisterA64 dst2, AddressA64 src);
|
||||
|
||||
// Store
|
||||
void str(RegisterA64 src, AddressA64 dst);
|
||||
void strb(RegisterA64 src, AddressA64 dst);
|
||||
void strh(RegisterA64 src, AddressA64 dst);
|
||||
void stp(RegisterA64 src1, RegisterA64 src2, AddressA64 dst);
|
||||
|
||||
// Control flow
|
||||
void b(Label& label);
|
||||
void bl(Label& label);
|
||||
void br(RegisterA64 src);
|
||||
void blr(RegisterA64 src);
|
||||
void ret();
|
||||
|
||||
// Conditional control flow
|
||||
void b(ConditionA64 cond, Label& label);
|
||||
void cbz(RegisterA64 src, Label& label);
|
||||
void cbnz(RegisterA64 src, Label& label);
|
||||
void tbz(RegisterA64 src, uint8_t bit, Label& label);
|
||||
void tbnz(RegisterA64 src, uint8_t bit, Label& label);
|
||||
|
||||
// Address of embedded data
|
||||
void adr(RegisterA64 dst, const void* ptr, size_t size);
|
||||
void adr(RegisterA64 dst, uint64_t value);
|
||||
void adr(RegisterA64 dst, double value);
|
||||
|
||||
// Address of code (label)
|
||||
void adr(RegisterA64 dst, Label& label);
|
||||
|
||||
// Floating-point scalar moves
|
||||
// Note: constant must be compatible with immediate floating point moves (see isFmovSupported)
|
||||
void fmov(RegisterA64 dst, RegisterA64 src);
|
||||
void fmov(RegisterA64 dst, double src);
|
||||
|
||||
// Floating-point scalar math
|
||||
void fabs(RegisterA64 dst, RegisterA64 src);
|
||||
void fadd(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
|
||||
void fdiv(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
|
||||
void fmul(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
|
||||
void fneg(RegisterA64 dst, RegisterA64 src);
|
||||
void fsqrt(RegisterA64 dst, RegisterA64 src);
|
||||
void fsub(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
|
||||
|
||||
// Floating-point rounding and conversions
|
||||
void frinta(RegisterA64 dst, RegisterA64 src);
|
||||
void frintm(RegisterA64 dst, RegisterA64 src);
|
||||
void frintp(RegisterA64 dst, RegisterA64 src);
|
||||
void fcvt(RegisterA64 dst, RegisterA64 src);
|
||||
void fcvtzs(RegisterA64 dst, RegisterA64 src);
|
||||
void fcvtzu(RegisterA64 dst, RegisterA64 src);
|
||||
void scvtf(RegisterA64 dst, RegisterA64 src);
|
||||
void ucvtf(RegisterA64 dst, RegisterA64 src);
|
||||
|
||||
// Floating-point conversion to integer using JS rules (wrap around 2^32) and set Z flag
|
||||
// note: this is part of ARM8.3 (JSCVT feature); support of this instruction needs to be checked at runtime
|
||||
void fjcvtzs(RegisterA64 dst, RegisterA64 src);
|
||||
|
||||
// Floating-point comparisons
|
||||
void fcmp(RegisterA64 src1, RegisterA64 src2);
|
||||
void fcmpz(RegisterA64 src);
|
||||
void fcsel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond);
|
||||
|
||||
// Run final checks
|
||||
bool finalize();
|
||||
|
||||
// Places a label at current location and returns it
|
||||
Label setLabel();
|
||||
|
||||
// Assigns label position to the current location
|
||||
void setLabel(Label& label);
|
||||
|
||||
// Extracts code offset (in bytes) from label
|
||||
uint32_t getLabelOffset(const Label& label)
|
||||
{
|
||||
LUAU_ASSERT(label.location != ~0u);
|
||||
return label.location * 4;
|
||||
}
|
||||
|
||||
void logAppend(const char* fmt, ...) LUAU_PRINTF_ATTR(2, 3);
|
||||
|
||||
uint32_t getCodeSize() const;
|
||||
|
||||
// Resulting data and code that need to be copied over one after the other
|
||||
// The *end* of 'data' has to be aligned to 16 bytes, this will also align 'code'
|
||||
std::vector<uint8_t> data;
|
||||
std::vector<uint32_t> code;
|
||||
|
||||
std::string text;
|
||||
|
||||
const bool logText = false;
|
||||
const unsigned int features = 0;
|
||||
|
||||
// Maximum immediate argument to functions like add/sub/cmp
|
||||
static constexpr size_t kMaxImmediate = (1 << 12) - 1;
|
||||
|
||||
// Check if immediate mode mask is supported for bitwise operations (and/or/xor)
|
||||
static bool isMaskSupported(uint32_t mask);
|
||||
|
||||
// Check if fmov can be used to synthesize a constant
|
||||
static bool isFmovSupported(double value);
|
||||
|
||||
private:
|
||||
// Instruction archetypes
|
||||
void place0(const char* name, uint32_t word);
|
||||
void placeSR3(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, int shift = 0, int N = 0);
|
||||
void placeSR2(const char* name, RegisterA64 dst, RegisterA64 src, uint8_t op, uint8_t op2 = 0);
|
||||
void placeR3(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, uint8_t op2);
|
||||
void placeR1(const char* name, RegisterA64 dst, RegisterA64 src, uint32_t op);
|
||||
void placeI12(const char* name, RegisterA64 dst, RegisterA64 src1, int src2, uint8_t op);
|
||||
void placeI16(const char* name, RegisterA64 dst, int src, uint8_t op, int shift = 0);
|
||||
void placeA(const char* name, RegisterA64 dst, AddressA64 src, uint16_t opsize, int sizelog);
|
||||
void placeB(const char* name, Label& label, uint8_t op);
|
||||
void placeBC(const char* name, Label& label, uint8_t op, uint8_t cond);
|
||||
void placeBCR(const char* name, Label& label, uint8_t op, RegisterA64 cond);
|
||||
void placeBR(const char* name, RegisterA64 src, uint32_t op);
|
||||
void placeBTR(const char* name, Label& label, uint8_t op, RegisterA64 cond, uint8_t bit);
|
||||
void placeADR(const char* name, RegisterA64 src, uint8_t op);
|
||||
void placeADR(const char* name, RegisterA64 src, uint8_t op, Label& label);
|
||||
void placeP(const char* name, RegisterA64 dst1, RegisterA64 dst2, AddressA64 src, uint8_t op, uint8_t opc, int sizelog);
|
||||
void placeCS(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond, uint8_t op, uint8_t opc, int invert = 0);
|
||||
void placeFCMP(const char* name, RegisterA64 src1, RegisterA64 src2, uint8_t op, uint8_t opc);
|
||||
void placeFMOV(const char* name, RegisterA64 dst, double src, uint32_t op);
|
||||
void placeBM(const char* name, RegisterA64 dst, RegisterA64 src1, uint32_t src2, uint8_t op);
|
||||
void placeBFM(const char* name, RegisterA64 dst, RegisterA64 src1, int src2, uint8_t op, int immr, int imms);
|
||||
|
||||
void place(uint32_t word);
|
||||
|
||||
struct Patch
|
||||
{
|
||||
enum Kind
|
||||
{
|
||||
Imm26,
|
||||
Imm19,
|
||||
Imm14,
|
||||
};
|
||||
|
||||
Kind kind : 2;
|
||||
uint32_t label : 30;
|
||||
uint32_t location;
|
||||
};
|
||||
|
||||
void patchLabel(Label& label, Patch::Kind kind);
|
||||
void patchOffset(uint32_t location, int value, Patch::Kind kind);
|
||||
|
||||
void commit();
|
||||
LUAU_NOINLINE void extend();
|
||||
|
||||
// Data
|
||||
size_t allocateData(size_t size, size_t align);
|
||||
|
||||
// Logging of assembly in text form
|
||||
LUAU_NOINLINE void log(const char* opcode);
|
||||
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
|
||||
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src1, int src2);
|
||||
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src);
|
||||
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, int src, int shift = 0);
|
||||
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, double src);
|
||||
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, AddressA64 src);
|
||||
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst1, RegisterA64 dst2, AddressA64 src);
|
||||
LUAU_NOINLINE void log(const char* opcode, RegisterA64 src, Label label, int imm = -1);
|
||||
LUAU_NOINLINE void log(const char* opcode, RegisterA64 src);
|
||||
LUAU_NOINLINE void log(const char* opcode, Label label);
|
||||
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond);
|
||||
LUAU_NOINLINE void log(Label label);
|
||||
LUAU_NOINLINE void log(RegisterA64 reg);
|
||||
LUAU_NOINLINE void log(AddressA64 addr);
|
||||
|
||||
uint32_t nextLabel = 1;
|
||||
std::vector<Patch> pendingLabels;
|
||||
std::vector<uint32_t> labelLocations;
|
||||
|
||||
bool finalized = false;
|
||||
bool overflowed = false;
|
||||
|
||||
size_t dataPos = 0;
|
||||
|
||||
uint32_t* codePos = nullptr;
|
||||
uint32_t* codeEnd = nullptr;
|
||||
};
|
||||
|
||||
} // namespace A64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,266 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/Common.h"
|
||||
#include "Luau/DenseHash.h"
|
||||
#include "Luau/Label.h"
|
||||
#include "Luau/ConditionX64.h"
|
||||
#include "Luau/OperandX64.h"
|
||||
#include "Luau/RegisterX64.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
namespace X64
|
||||
{
|
||||
|
||||
enum class RoundingModeX64
|
||||
{
|
||||
RoundToNearestEven = 0b00,
|
||||
RoundToNegativeInfinity = 0b01,
|
||||
RoundToPositiveInfinity = 0b10,
|
||||
RoundToZero = 0b11,
|
||||
};
|
||||
|
||||
enum class AlignmentDataX64
|
||||
{
|
||||
Nop,
|
||||
Int3,
|
||||
Ud2, // int3 will be used as a fall-back if it doesn't fit
|
||||
};
|
||||
|
||||
enum class ABIX64
|
||||
{
|
||||
Windows,
|
||||
SystemV,
|
||||
};
|
||||
|
||||
class AssemblyBuilderX64
|
||||
{
|
||||
public:
|
||||
explicit AssemblyBuilderX64(bool logText, ABIX64 abi);
|
||||
explicit AssemblyBuilderX64(bool logText);
|
||||
~AssemblyBuilderX64();
|
||||
|
||||
// Base two operand instructions with 9 opcode selection
|
||||
void add(OperandX64 lhs, OperandX64 rhs);
|
||||
void sub(OperandX64 lhs, OperandX64 rhs);
|
||||
void cmp(OperandX64 lhs, OperandX64 rhs);
|
||||
void and_(OperandX64 lhs, OperandX64 rhs);
|
||||
void or_(OperandX64 lhs, OperandX64 rhs);
|
||||
void xor_(OperandX64 lhs, OperandX64 rhs);
|
||||
|
||||
// Binary shift instructions with special rhs handling
|
||||
void sal(OperandX64 lhs, OperandX64 rhs);
|
||||
void sar(OperandX64 lhs, OperandX64 rhs);
|
||||
void shl(OperandX64 lhs, OperandX64 rhs);
|
||||
void shr(OperandX64 lhs, OperandX64 rhs);
|
||||
void rol(OperandX64 lhs, OperandX64 rhs);
|
||||
void ror(OperandX64 lhs, OperandX64 rhs);
|
||||
|
||||
// Two operand mov instruction has additional specialized encodings
|
||||
void mov(OperandX64 lhs, OperandX64 rhs);
|
||||
void mov64(RegisterX64 lhs, int64_t imm);
|
||||
void movsx(RegisterX64 lhs, OperandX64 rhs);
|
||||
void movzx(RegisterX64 lhs, OperandX64 rhs);
|
||||
|
||||
// Base one operand instruction with 2 opcode selection
|
||||
void div(OperandX64 op);
|
||||
void idiv(OperandX64 op);
|
||||
void mul(OperandX64 op);
|
||||
void imul(OperandX64 op);
|
||||
void neg(OperandX64 op);
|
||||
void not_(OperandX64 op);
|
||||
void dec(OperandX64 op);
|
||||
void inc(OperandX64 op);
|
||||
|
||||
// Additional forms of imul
|
||||
void imul(OperandX64 lhs, OperandX64 rhs);
|
||||
void imul(OperandX64 dst, OperandX64 lhs, int32_t rhs);
|
||||
|
||||
void test(OperandX64 lhs, OperandX64 rhs);
|
||||
void lea(OperandX64 lhs, OperandX64 rhs);
|
||||
void setcc(ConditionX64 cond, OperandX64 op);
|
||||
|
||||
void push(OperandX64 op);
|
||||
void pop(OperandX64 op);
|
||||
void ret();
|
||||
|
||||
// Control flow
|
||||
void jcc(ConditionX64 cond, Label& label);
|
||||
void jmp(Label& label);
|
||||
void jmp(OperandX64 op);
|
||||
|
||||
void call(Label& label);
|
||||
void call(OperandX64 op);
|
||||
|
||||
void int3();
|
||||
|
||||
void bsr(RegisterX64 dst, OperandX64 src);
|
||||
void bsf(RegisterX64 dst, OperandX64 src);
|
||||
|
||||
// Code alignment
|
||||
void nop(uint32_t length = 1);
|
||||
void align(uint32_t alignment, AlignmentDataX64 data = AlignmentDataX64::Nop);
|
||||
|
||||
// AVX
|
||||
void vaddpd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
||||
void vaddps(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
||||
void vaddsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
||||
void vaddss(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
||||
|
||||
void vsubsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
||||
void vmulsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
||||
void vdivsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
||||
|
||||
void vandpd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
||||
void vandnpd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
||||
|
||||
void vxorpd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
||||
void vorpd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
||||
|
||||
void vucomisd(OperandX64 src1, OperandX64 src2);
|
||||
|
||||
void vcvttsd2si(OperandX64 dst, OperandX64 src);
|
||||
void vcvtsi2sd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
||||
void vcvtsd2ss(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
||||
|
||||
void vroundsd(OperandX64 dst, OperandX64 src1, OperandX64 src2, RoundingModeX64 roundingMode); // inexact
|
||||
|
||||
void vsqrtpd(OperandX64 dst, OperandX64 src);
|
||||
void vsqrtps(OperandX64 dst, OperandX64 src);
|
||||
void vsqrtsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
||||
void vsqrtss(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
||||
|
||||
void vmovsd(OperandX64 dst, OperandX64 src);
|
||||
void vmovsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
||||
void vmovss(OperandX64 dst, OperandX64 src);
|
||||
void vmovss(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
||||
void vmovapd(OperandX64 dst, OperandX64 src);
|
||||
void vmovaps(OperandX64 dst, OperandX64 src);
|
||||
void vmovupd(OperandX64 dst, OperandX64 src);
|
||||
void vmovups(OperandX64 dst, OperandX64 src);
|
||||
void vmovq(OperandX64 lhs, OperandX64 rhs);
|
||||
|
||||
void vmaxsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
||||
void vminsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
||||
|
||||
void vcmpltsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
||||
|
||||
void vblendvpd(RegisterX64 dst, RegisterX64 src1, OperandX64 mask, RegisterX64 src3);
|
||||
|
||||
|
||||
// Run final checks
|
||||
bool finalize();
|
||||
|
||||
// Places a label at current location and returns it
|
||||
Label setLabel();
|
||||
|
||||
// Assigns label position to the current location
|
||||
void setLabel(Label& label);
|
||||
|
||||
// Extracts code offset (in bytes) from label
|
||||
uint32_t getLabelOffset(const Label& label)
|
||||
{
|
||||
LUAU_ASSERT(label.location != ~0u);
|
||||
return label.location;
|
||||
}
|
||||
|
||||
// Constant allocation (uses rip-relative addressing)
|
||||
OperandX64 i64(int64_t value);
|
||||
OperandX64 f32(float value);
|
||||
OperandX64 f64(double value);
|
||||
OperandX64 f32x4(float x, float y, float z, float w);
|
||||
OperandX64 f64x2(double x, double y);
|
||||
OperandX64 bytes(const void* ptr, size_t size, size_t align = 8);
|
||||
|
||||
void logAppend(const char* fmt, ...) LUAU_PRINTF_ATTR(2, 3);
|
||||
|
||||
uint32_t getCodeSize() const;
|
||||
|
||||
// Resulting data and code that need to be copied over one after the other
|
||||
// The *end* of 'data' has to be aligned to 16 bytes, this will also align 'code'
|
||||
std::vector<uint8_t> data;
|
||||
std::vector<uint8_t> code;
|
||||
|
||||
std::string text;
|
||||
|
||||
const bool logText = false;
|
||||
|
||||
const ABIX64 abi;
|
||||
|
||||
private:
|
||||
// Instruction archetypes
|
||||
void placeBinary(const char* name, OperandX64 lhs, OperandX64 rhs, uint8_t codeimm8, uint8_t codeimm, uint8_t codeimmImm8, uint8_t code8rev,
|
||||
uint8_t coderev, uint8_t code8, uint8_t code, uint8_t opreg);
|
||||
void placeBinaryRegMemAndImm(OperandX64 lhs, OperandX64 rhs, uint8_t code8, uint8_t code, uint8_t codeImm8, uint8_t opreg);
|
||||
void placeBinaryRegAndRegMem(OperandX64 lhs, OperandX64 rhs, uint8_t code8, uint8_t code);
|
||||
void placeBinaryRegMemAndReg(OperandX64 lhs, OperandX64 rhs, uint8_t code8, uint8_t code);
|
||||
|
||||
void placeUnaryModRegMem(const char* name, OperandX64 op, uint8_t code8, uint8_t code, uint8_t opreg);
|
||||
|
||||
void placeShift(const char* name, OperandX64 lhs, OperandX64 rhs, uint8_t opreg);
|
||||
|
||||
void placeJcc(const char* name, Label& label, uint8_t cc);
|
||||
|
||||
void placeAvx(const char* name, OperandX64 dst, OperandX64 src, uint8_t code, bool setW, uint8_t mode, uint8_t prefix);
|
||||
void placeAvx(const char* name, OperandX64 dst, OperandX64 src, uint8_t code, uint8_t coderev, bool setW, uint8_t mode, uint8_t prefix);
|
||||
void placeAvx(const char* name, OperandX64 dst, OperandX64 src1, OperandX64 src2, uint8_t code, bool setW, uint8_t mode, uint8_t prefix);
|
||||
void placeAvx(
|
||||
const char* name, OperandX64 dst, OperandX64 src1, OperandX64 src2, uint8_t imm8, uint8_t code, bool setW, uint8_t mode, uint8_t prefix);
|
||||
|
||||
// Instruction components
|
||||
void placeRegAndModRegMem(OperandX64 lhs, OperandX64 rhs, int32_t extraCodeBytes = 0);
|
||||
void placeModRegMem(OperandX64 rhs, uint8_t regop, int32_t extraCodeBytes = 0);
|
||||
void placeRex(RegisterX64 op);
|
||||
void placeRex(OperandX64 op);
|
||||
void placeRexNoW(OperandX64 op);
|
||||
void placeRex(RegisterX64 lhs, OperandX64 rhs);
|
||||
void placeVex(OperandX64 dst, OperandX64 src1, OperandX64 src2, bool setW, uint8_t mode, uint8_t prefix);
|
||||
void placeImm8Or32(int32_t imm);
|
||||
void placeImm8(int32_t imm);
|
||||
void placeImm32(int32_t imm);
|
||||
void placeImm64(int64_t imm);
|
||||
void placeLabel(Label& label);
|
||||
void place(uint8_t byte);
|
||||
|
||||
void commit();
|
||||
LUAU_NOINLINE void extend();
|
||||
|
||||
// Data
|
||||
size_t allocateData(size_t size, size_t align);
|
||||
|
||||
// Logging of assembly in text form (Intel asm with VS disassembly formatting)
|
||||
LUAU_NOINLINE void log(const char* opcode);
|
||||
LUAU_NOINLINE void log(const char* opcode, OperandX64 op);
|
||||
LUAU_NOINLINE void log(const char* opcode, OperandX64 op1, OperandX64 op2);
|
||||
LUAU_NOINLINE void log(const char* opcode, OperandX64 op1, OperandX64 op2, OperandX64 op3);
|
||||
LUAU_NOINLINE void log(const char* opcode, OperandX64 op1, OperandX64 op2, OperandX64 op3, OperandX64 op4);
|
||||
LUAU_NOINLINE void log(Label label);
|
||||
LUAU_NOINLINE void log(const char* opcode, Label label);
|
||||
void log(OperandX64 op);
|
||||
|
||||
const char* getSizeName(SizeX64 size) const;
|
||||
const char* getRegisterName(RegisterX64 reg) const;
|
||||
|
||||
uint32_t nextLabel = 1;
|
||||
std::vector<Label> pendingLabels;
|
||||
std::vector<uint32_t> labelLocations;
|
||||
|
||||
DenseHashMap<uint64_t, int32_t> constCache64;
|
||||
|
||||
bool finalized = false;
|
||||
|
||||
size_t dataPos = 0;
|
||||
|
||||
uint8_t* codePos = nullptr;
|
||||
uint8_t* codeEnd = nullptr;
|
||||
};
|
||||
|
||||
} // namespace X64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,56 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
constexpr uint32_t kCodeAlignment = 32;
|
||||
|
||||
struct CodeAllocator
|
||||
{
|
||||
CodeAllocator(size_t blockSize, size_t maxTotalSize);
|
||||
~CodeAllocator();
|
||||
|
||||
// Places data and code into the executable page area
|
||||
// To allow allocation while previously allocated code is already running, allocation has page granularity
|
||||
// It's important to group functions together so that page alignment won't result in a lot of wasted space
|
||||
bool allocate(
|
||||
const uint8_t* data, size_t dataSize, const uint8_t* code, size_t codeSize, uint8_t*& result, size_t& resultSize, uint8_t*& resultCodeStart);
|
||||
|
||||
// Provided to callbacks
|
||||
void* context = nullptr;
|
||||
|
||||
// Called when new block is created to create and setup the unwinding information for all the code in the block
|
||||
// 'startOffset' reserves space for data at the beginning of the page
|
||||
void* (*createBlockUnwindInfo)(void* context, uint8_t* block, size_t blockSize, size_t& startOffset) = nullptr;
|
||||
|
||||
// Called to destroy unwinding information returned by 'createBlockUnwindInfo'
|
||||
void (*destroyBlockUnwindInfo)(void* context, void* unwindData) = nullptr;
|
||||
|
||||
// Unwind information can be placed inside the block with some implementation-specific reservations at the beginning
|
||||
// But to simplify block space checks, we limit the max size of all that data
|
||||
static const size_t kMaxReservedDataSize = 256;
|
||||
|
||||
bool allocateNewBlock(size_t& unwindInfoSize);
|
||||
|
||||
// Current block we use for allocations
|
||||
uint8_t* blockPos = nullptr;
|
||||
uint8_t* blockEnd = nullptr;
|
||||
|
||||
// All allocated blocks
|
||||
std::vector<uint8_t*> blocks;
|
||||
std::vector<void*> unwindInfos;
|
||||
|
||||
size_t blockSize = 0;
|
||||
size_t maxTotalSize = 0;
|
||||
};
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,19 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
// context must be an UnwindBuilder
|
||||
void* createBlockUnwindInfo(void* context, uint8_t* block, size_t blockSize, size_t& startOffset);
|
||||
void destroyBlockUnwindInfo(void* context, void* unwindData);
|
||||
|
||||
bool isUnwindSupported();
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,45 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
struct lua_State;
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
bool isSupported();
|
||||
|
||||
void create(lua_State* L);
|
||||
|
||||
// Builds target function and all inner functions
|
||||
void compile(lua_State* L, int idx);
|
||||
|
||||
using AnnotatorFn = void (*)(void* context, std::string& result, int fid, int instpos);
|
||||
|
||||
struct AssemblyOptions
|
||||
{
|
||||
bool outputBinary = false;
|
||||
|
||||
bool includeAssembly = false;
|
||||
bool includeIr = false;
|
||||
bool includeOutlinedCode = false;
|
||||
|
||||
// Optional annotator function can be provided to describe each instruction, it takes function id and sequential instruction id
|
||||
AnnotatorFn annotator = nullptr;
|
||||
void* annotatorContext = nullptr;
|
||||
};
|
||||
|
||||
// Generates assembly for target function and all inner functions
|
||||
std::string getAssembly(lua_State* L, int idx, AssemblyOptions options = {});
|
||||
|
||||
using PerfLogFn = void (*)(void* context, uintptr_t addr, unsigned size, const char* symbol);
|
||||
|
||||
void setPerfLog(void* context, PerfLogFn logFn);
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,57 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
namespace A64
|
||||
{
|
||||
|
||||
// See Table C1-1 on page C1-229 of Arm ARM for A-profile architecture
|
||||
enum class ConditionA64
|
||||
{
|
||||
// EQ: integer (equal), floating-point (equal)
|
||||
Equal,
|
||||
// NE: integer (not equal), floating-point (not equal or unordered)
|
||||
NotEqual,
|
||||
|
||||
// CS: integer (carry set), unsigned integer (greater than, equal), floating-point (greater than, equal or unordered)
|
||||
CarrySet,
|
||||
// CC: integer (carry clear), unsigned integer (less than), floating-point (less than)
|
||||
CarryClear,
|
||||
|
||||
// MI: integer (negative), floating-point (less than)
|
||||
Minus,
|
||||
// PL: integer (positive or zero), floating-point (greater than, equal or unordered)
|
||||
Plus,
|
||||
|
||||
// VS: integer (overflow), floating-point (unordered)
|
||||
Overflow,
|
||||
// VC: integer (no overflow), floating-point (ordered)
|
||||
NoOverflow,
|
||||
|
||||
// HI: integer (unsigned higher), floating-point (greater than, or unordered)
|
||||
UnsignedGreater,
|
||||
// LS: integer (unsigned lower or same), floating-point (less than or equal)
|
||||
UnsignedLessEqual,
|
||||
|
||||
// GE: integer (signed greater than or equal), floating-point (greater than or equal)
|
||||
GreaterEqual,
|
||||
// LT: integer (signed less than), floating-point (less than, or unordered)
|
||||
Less,
|
||||
|
||||
// GT: integer (signed greater than), floating-point (greater than)
|
||||
Greater,
|
||||
// LE: integer (signed less than or equal), floating-point (less than, equal or unordered)
|
||||
LessEqual,
|
||||
|
||||
// AL: always
|
||||
Always,
|
||||
|
||||
Count
|
||||
};
|
||||
|
||||
} // namespace A64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,47 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
enum class ConditionX64 : uint8_t
|
||||
{
|
||||
Overflow,
|
||||
NoOverflow,
|
||||
|
||||
Carry,
|
||||
NoCarry,
|
||||
|
||||
Below,
|
||||
BelowEqual,
|
||||
Above,
|
||||
AboveEqual,
|
||||
Equal,
|
||||
Less,
|
||||
LessEqual,
|
||||
Greater,
|
||||
GreaterEqual,
|
||||
|
||||
NotBelow,
|
||||
NotBelowEqual,
|
||||
NotAbove,
|
||||
NotAboveEqual,
|
||||
NotEqual,
|
||||
NotLess,
|
||||
NotLessEqual,
|
||||
NotGreater,
|
||||
NotGreaterEqual,
|
||||
|
||||
Zero,
|
||||
NotZero,
|
||||
|
||||
Parity,
|
||||
NotParity,
|
||||
|
||||
Count
|
||||
};
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,99 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include <bitset>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
struct IrBlock;
|
||||
struct IrFunction;
|
||||
|
||||
void updateUseCounts(IrFunction& function);
|
||||
|
||||
void updateLastUseLocations(IrFunction& function);
|
||||
|
||||
uint32_t getNextInstUse(IrFunction& function, uint32_t targetInstIdx, uint32_t startInstIdx);
|
||||
|
||||
// Returns how many values are coming into the block (live in) and how many are coming out of the block (live out)
|
||||
std::pair<uint32_t, uint32_t> getLiveInOutValueCount(IrFunction& function, IrBlock& block);
|
||||
uint32_t getLiveInValueCount(IrFunction& function, IrBlock& block);
|
||||
uint32_t getLiveOutValueCount(IrFunction& function, IrBlock& block);
|
||||
|
||||
struct RegisterSet
|
||||
{
|
||||
std::bitset<256> regs;
|
||||
|
||||
// If variadic sequence is active, we track register from which it starts
|
||||
bool varargSeq = false;
|
||||
uint8_t varargStart = 0;
|
||||
};
|
||||
|
||||
void requireVariadicSequence(RegisterSet& sourceRs, const RegisterSet& defRs, uint8_t varargStart);
|
||||
|
||||
struct CfgInfo
|
||||
{
|
||||
std::vector<uint32_t> predecessors;
|
||||
std::vector<uint32_t> predecessorsOffsets;
|
||||
|
||||
std::vector<uint32_t> successors;
|
||||
std::vector<uint32_t> successorsOffsets;
|
||||
|
||||
// VM registers that are live when the block is entered
|
||||
// Additionally, an active variadic sequence can exist at the entry of the block
|
||||
std::vector<RegisterSet> in;
|
||||
|
||||
// VM registers that are defined inside the block
|
||||
// It can also contain a variadic sequence definition if that hasn't been consumed inside the block
|
||||
// Note that this means that checking 'def' set might not be enough to say that register has not been written to
|
||||
std::vector<RegisterSet> def;
|
||||
|
||||
// VM registers that are coming out from the block
|
||||
// These might be registers that are defined inside the block or have been defined at the entry of the block
|
||||
// Additionally, an active variadic sequence can exist at the exit of the block
|
||||
std::vector<RegisterSet> out;
|
||||
|
||||
// VM registers captured by nested closures
|
||||
// This set can never have an active variadic sequence
|
||||
RegisterSet captured;
|
||||
};
|
||||
|
||||
void computeCfgInfo(IrFunction& function);
|
||||
|
||||
struct BlockIteratorWrapper
|
||||
{
|
||||
const uint32_t* itBegin = nullptr;
|
||||
const uint32_t* itEnd = nullptr;
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
return itBegin == itEnd;
|
||||
}
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
return size_t(itEnd - itBegin);
|
||||
}
|
||||
|
||||
const uint32_t* begin() const
|
||||
{
|
||||
return itBegin;
|
||||
}
|
||||
|
||||
const uint32_t* end() const
|
||||
{
|
||||
return itEnd;
|
||||
}
|
||||
};
|
||||
|
||||
BlockIteratorWrapper predecessors(const CfgInfo& cfg, uint32_t blockIdx);
|
||||
BlockIteratorWrapper successors(const CfgInfo& cfg, uint32_t blockIdx);
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,117 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/Bytecode.h"
|
||||
#include "Luau/Common.h"
|
||||
#include "Luau/DenseHash.h"
|
||||
#include "Luau/IrData.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
struct Proto;
|
||||
typedef uint32_t Instruction;
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
struct AssemblyOptions;
|
||||
|
||||
struct IrBuilder
|
||||
{
|
||||
IrBuilder();
|
||||
|
||||
void buildFunctionIr(Proto* proto);
|
||||
|
||||
void rebuildBytecodeBasicBlocks(Proto* proto);
|
||||
void translateInst(LuauOpcode op, const Instruction* pc, int i);
|
||||
|
||||
bool isInternalBlock(IrOp block);
|
||||
void beginBlock(IrOp block);
|
||||
|
||||
void loadAndCheckTag(IrOp loc, uint8_t tag, IrOp fallback);
|
||||
|
||||
// Clones all instructions into the current block
|
||||
// Source block that is cloned cannot use values coming in from a predecessor
|
||||
void clone(const IrBlock& source, bool removeCurrentTerminator);
|
||||
|
||||
IrOp undef();
|
||||
|
||||
IrOp constBool(bool value);
|
||||
IrOp constInt(int value);
|
||||
IrOp constUint(unsigned value);
|
||||
IrOp constDouble(double value);
|
||||
IrOp constTag(uint8_t value);
|
||||
IrOp constAny(IrConst constant, uint64_t asCommonKey);
|
||||
|
||||
IrOp cond(IrCondition cond);
|
||||
|
||||
IrOp inst(IrCmd cmd);
|
||||
IrOp inst(IrCmd cmd, IrOp a);
|
||||
IrOp inst(IrCmd cmd, IrOp a, IrOp b);
|
||||
IrOp inst(IrCmd cmd, IrOp a, IrOp b, IrOp c);
|
||||
IrOp inst(IrCmd cmd, IrOp a, IrOp b, IrOp c, IrOp d);
|
||||
IrOp inst(IrCmd cmd, IrOp a, IrOp b, IrOp c, IrOp d, IrOp e);
|
||||
IrOp inst(IrCmd cmd, IrOp a, IrOp b, IrOp c, IrOp d, IrOp e, IrOp f);
|
||||
|
||||
IrOp block(IrBlockKind kind); // Requested kind can be ignored if we are in an outlined sequence
|
||||
IrOp blockAtInst(uint32_t index);
|
||||
|
||||
IrOp vmReg(uint8_t index);
|
||||
IrOp vmConst(uint32_t index);
|
||||
IrOp vmUpvalue(uint8_t index);
|
||||
|
||||
bool inTerminatedBlock = false;
|
||||
|
||||
bool activeFastcallFallback = false;
|
||||
IrOp fastcallFallbackReturn;
|
||||
|
||||
IrFunction function;
|
||||
|
||||
uint32_t activeBlockIdx = ~0u;
|
||||
|
||||
std::vector<uint32_t> instIndexToBlock; // Block index at the bytecode instruction
|
||||
|
||||
// Similar to BytecodeBuilder, duplicate constants are removed used the same method
|
||||
struct ConstantKey
|
||||
{
|
||||
IrConstKind kind;
|
||||
// Note: this stores value* from IrConst; when kind is Double, this stores the same bits as double does but in uint64_t.
|
||||
uint64_t value;
|
||||
|
||||
bool operator==(const ConstantKey& key) const
|
||||
{
|
||||
return kind == key.kind && value == key.value;
|
||||
}
|
||||
};
|
||||
|
||||
struct ConstantKeyHash
|
||||
{
|
||||
size_t operator()(const ConstantKey& key) const
|
||||
{
|
||||
// finalizer from MurmurHash64B
|
||||
const uint32_t m = 0x5bd1e995;
|
||||
|
||||
uint32_t h1 = uint32_t(key.value);
|
||||
uint32_t h2 = uint32_t(key.value >> 32) ^ (int(key.kind) * m);
|
||||
|
||||
h1 ^= h2 >> 18;
|
||||
h1 *= m;
|
||||
h2 ^= h1 >> 22;
|
||||
h2 *= m;
|
||||
h1 ^= h2 >> 17;
|
||||
h1 *= m;
|
||||
h2 ^= h1 >> 19;
|
||||
h2 *= m;
|
||||
|
||||
// ... truncated to 32-bit output (normally hash is equal to (uint64_t(h1) << 32) | h2, but we only really need the lower 32-bit half)
|
||||
return size_t(h2);
|
||||
}
|
||||
};
|
||||
|
||||
DenseHashMap<ConstantKey, uint32_t, ConstantKeyHash> constantMap;
|
||||
};
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,84 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/AssemblyBuilderX64.h"
|
||||
#include "Luau/IrData.h"
|
||||
#include "Luau/OperandX64.h"
|
||||
#include "Luau/RegisterX64.h"
|
||||
|
||||
#include <array>
|
||||
|
||||
// TODO: call wrapper can be used to suggest target registers for ScopedRegX64 to compute data into argument registers directly
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
namespace X64
|
||||
{
|
||||
|
||||
struct IrRegAllocX64;
|
||||
struct ScopedRegX64;
|
||||
|
||||
struct CallArgument
|
||||
{
|
||||
SizeX64 targetSize = SizeX64::none;
|
||||
|
||||
OperandX64 source = noreg;
|
||||
IrOp sourceOp;
|
||||
|
||||
OperandX64 target = noreg;
|
||||
bool candidate = true;
|
||||
};
|
||||
|
||||
class IrCallWrapperX64
|
||||
{
|
||||
public:
|
||||
IrCallWrapperX64(IrRegAllocX64& regs, AssemblyBuilderX64& build, uint32_t instIdx = kInvalidInstIdx);
|
||||
|
||||
void addArgument(SizeX64 targetSize, OperandX64 source, IrOp sourceOp = {});
|
||||
void addArgument(SizeX64 targetSize, ScopedRegX64& scopedReg);
|
||||
|
||||
void call(const OperandX64& func);
|
||||
|
||||
RegisterX64 suggestNextArgumentRegister(SizeX64 size) const;
|
||||
|
||||
IrRegAllocX64& regs;
|
||||
AssemblyBuilderX64& build;
|
||||
uint32_t instIdx = ~0u;
|
||||
|
||||
private:
|
||||
OperandX64 getNextArgumentTarget(SizeX64 size) const;
|
||||
void countRegisterUses();
|
||||
CallArgument* findNonInterferingArgument();
|
||||
bool interferesWithOperand(const OperandX64& op, RegisterX64 reg) const;
|
||||
bool interferesWithActiveSources(const CallArgument& targetArg, int targetArgIndex) const;
|
||||
bool interferesWithActiveTarget(RegisterX64 sourceReg) const;
|
||||
void moveToTarget(CallArgument& arg);
|
||||
void freeSourceRegisters(CallArgument& arg);
|
||||
void renameRegister(RegisterX64& target, RegisterX64 reg, RegisterX64 replacement);
|
||||
void renameSourceRegisters(RegisterX64 reg, RegisterX64 replacement);
|
||||
RegisterX64 findConflictingTarget() const;
|
||||
void renameConflictingRegister(RegisterX64 conflict);
|
||||
|
||||
int getRegisterUses(RegisterX64 reg) const;
|
||||
void addRegisterUse(RegisterX64 reg);
|
||||
void removeRegisterUse(RegisterX64 reg);
|
||||
|
||||
static const int kMaxCallArguments = 6;
|
||||
std::array<CallArgument, kMaxCallArguments> args;
|
||||
int argCount = 0;
|
||||
|
||||
int gprPos = 0;
|
||||
int xmmPos = 0;
|
||||
|
||||
OperandX64 funcOp;
|
||||
|
||||
// Internal counters for remaining register use counts
|
||||
std::array<uint8_t, 16> gprUses;
|
||||
std::array<uint8_t, 16> xmmUses;
|
||||
};
|
||||
|
||||
} // namespace X64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,45 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/IrData.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
struct CfgInfo;
|
||||
|
||||
const char* getCmdName(IrCmd cmd);
|
||||
const char* getBlockKindName(IrBlockKind kind);
|
||||
|
||||
struct IrToStringContext
|
||||
{
|
||||
std::string& result;
|
||||
const std::vector<IrBlock>& blocks;
|
||||
const std::vector<IrConst>& constants;
|
||||
const CfgInfo& cfg;
|
||||
};
|
||||
|
||||
void toString(IrToStringContext& ctx, const IrInst& inst, uint32_t index);
|
||||
void toString(IrToStringContext& ctx, const IrBlock& block, uint32_t index); // Block title
|
||||
void toString(IrToStringContext& ctx, IrOp op);
|
||||
|
||||
void toString(std::string& result, IrConst constant);
|
||||
|
||||
void toStringDetailed(IrToStringContext& ctx, const IrBlock& block, uint32_t blockIdx, const IrInst& inst, uint32_t instIdx, bool includeUseInfo);
|
||||
void toStringDetailed(IrToStringContext& ctx, const IrBlock& block, uint32_t index, bool includeUseInfo); // Block title
|
||||
|
||||
std::string toString(const IrFunction& function, bool includeUseInfo);
|
||||
|
||||
std::string dump(const IrFunction& function);
|
||||
|
||||
std::string toDot(const IrFunction& function, bool includeInst);
|
||||
|
||||
std::string dumpDot(const IrFunction& function, bool includeInst);
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,121 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/AssemblyBuilderX64.h"
|
||||
#include "Luau/IrData.h"
|
||||
#include "Luau/RegisterX64.h"
|
||||
|
||||
#include <array>
|
||||
#include <initializer_list>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
namespace X64
|
||||
{
|
||||
|
||||
constexpr uint8_t kNoStackSlot = 0xff;
|
||||
|
||||
struct IrSpillX64
|
||||
{
|
||||
uint32_t instIdx = 0;
|
||||
IrValueKind valueKind = IrValueKind::Unknown;
|
||||
|
||||
unsigned spillId = 0;
|
||||
|
||||
// Spill location can be a stack location or be empty
|
||||
// When it's empty, it means that instruction value can be rematerialized
|
||||
uint8_t stackSlot = kNoStackSlot;
|
||||
|
||||
RegisterX64 originalLoc = noreg;
|
||||
};
|
||||
|
||||
struct IrRegAllocX64
|
||||
{
|
||||
IrRegAllocX64(AssemblyBuilderX64& build, IrFunction& function);
|
||||
|
||||
RegisterX64 allocReg(SizeX64 size, uint32_t instIdx);
|
||||
RegisterX64 allocRegOrReuse(SizeX64 size, uint32_t instIdx, std::initializer_list<IrOp> oprefs);
|
||||
RegisterX64 takeReg(RegisterX64 reg, uint32_t instIdx);
|
||||
|
||||
void freeReg(RegisterX64 reg);
|
||||
void freeLastUseReg(IrInst& target, uint32_t instIdx);
|
||||
void freeLastUseRegs(const IrInst& inst, uint32_t instIdx);
|
||||
|
||||
bool isLastUseReg(const IrInst& target, uint32_t instIdx) const;
|
||||
|
||||
bool shouldFreeGpr(RegisterX64 reg) const;
|
||||
|
||||
unsigned findSpillStackSlot(IrValueKind valueKind);
|
||||
|
||||
IrOp getRestoreOp(const IrInst& inst) const;
|
||||
bool hasRestoreOp(const IrInst& inst) const;
|
||||
OperandX64 getRestoreAddress(const IrInst& inst, IrOp restoreOp);
|
||||
|
||||
// Register used by instruction is about to be freed, have to find a way to restore value later
|
||||
void preserve(IrInst& inst);
|
||||
|
||||
void restore(IrInst& inst, bool intoOriginalLocation);
|
||||
|
||||
void preserveAndFreeInstValues();
|
||||
|
||||
uint32_t findInstructionWithFurthestNextUse(const std::array<uint32_t, 16>& regInstUsers) const;
|
||||
|
||||
void assertFree(RegisterX64 reg) const;
|
||||
void assertAllFree() const;
|
||||
void assertNoSpills() const;
|
||||
|
||||
AssemblyBuilderX64& build;
|
||||
IrFunction& function;
|
||||
|
||||
uint32_t currInstIdx = ~0u;
|
||||
|
||||
std::array<bool, 16> freeGprMap;
|
||||
std::array<uint32_t, 16> gprInstUsers;
|
||||
std::array<bool, 16> freeXmmMap;
|
||||
std::array<uint32_t, 16> xmmInstUsers;
|
||||
|
||||
std::bitset<256> usedSpillSlots;
|
||||
unsigned maxUsedSlot = 0;
|
||||
unsigned nextSpillId = 1;
|
||||
std::vector<IrSpillX64> spills;
|
||||
};
|
||||
|
||||
struct ScopedRegX64
|
||||
{
|
||||
explicit ScopedRegX64(IrRegAllocX64& owner);
|
||||
ScopedRegX64(IrRegAllocX64& owner, SizeX64 size);
|
||||
ScopedRegX64(IrRegAllocX64& owner, RegisterX64 reg);
|
||||
~ScopedRegX64();
|
||||
|
||||
ScopedRegX64(const ScopedRegX64&) = delete;
|
||||
ScopedRegX64& operator=(const ScopedRegX64&) = delete;
|
||||
|
||||
void alloc(SizeX64 size);
|
||||
void free();
|
||||
|
||||
RegisterX64 release();
|
||||
|
||||
IrRegAllocX64& owner;
|
||||
RegisterX64 reg;
|
||||
};
|
||||
|
||||
// When IR instruction makes a call under a condition that's not reflected as a real branch in IR,
|
||||
// spilled values have to be restored to their exact original locations, so that both after a call
|
||||
// and after the skip, values are found in the same place
|
||||
struct ScopedSpills
|
||||
{
|
||||
explicit ScopedSpills(IrRegAllocX64& owner);
|
||||
~ScopedSpills();
|
||||
|
||||
ScopedSpills(const ScopedSpills&) = delete;
|
||||
ScopedSpills& operator=(const ScopedSpills&) = delete;
|
||||
|
||||
IrRegAllocX64& owner;
|
||||
unsigned startSpillId = 0;
|
||||
};
|
||||
|
||||
} // namespace X64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,258 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/Bytecode.h"
|
||||
#include "Luau/Common.h"
|
||||
#include "Luau/IrData.h"
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
struct IrBuilder;
|
||||
|
||||
inline bool isJumpD(LuauOpcode op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case LOP_JUMP:
|
||||
case LOP_JUMPIF:
|
||||
case LOP_JUMPIFNOT:
|
||||
case LOP_JUMPIFEQ:
|
||||
case LOP_JUMPIFLE:
|
||||
case LOP_JUMPIFLT:
|
||||
case LOP_JUMPIFNOTEQ:
|
||||
case LOP_JUMPIFNOTLE:
|
||||
case LOP_JUMPIFNOTLT:
|
||||
case LOP_FORNPREP:
|
||||
case LOP_FORNLOOP:
|
||||
case LOP_FORGPREP:
|
||||
case LOP_FORGLOOP:
|
||||
case LOP_FORGPREP_INEXT:
|
||||
case LOP_FORGPREP_NEXT:
|
||||
case LOP_JUMPBACK:
|
||||
case LOP_JUMPXEQKNIL:
|
||||
case LOP_JUMPXEQKB:
|
||||
case LOP_JUMPXEQKN:
|
||||
case LOP_JUMPXEQKS:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool isSkipC(LuauOpcode op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case LOP_LOADB:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool isFastCall(LuauOpcode op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case LOP_FASTCALL:
|
||||
case LOP_FASTCALL1:
|
||||
case LOP_FASTCALL2:
|
||||
case LOP_FASTCALL2K:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
inline int getJumpTarget(uint32_t insn, uint32_t pc)
|
||||
{
|
||||
LuauOpcode op = LuauOpcode(LUAU_INSN_OP(insn));
|
||||
|
||||
if (isJumpD(op))
|
||||
return int(pc + LUAU_INSN_D(insn) + 1);
|
||||
else if (isFastCall(op))
|
||||
return int(pc + LUAU_INSN_C(insn) + 2);
|
||||
else if (isSkipC(op) && LUAU_INSN_C(insn))
|
||||
return int(pc + LUAU_INSN_C(insn) + 1);
|
||||
else if (op == LOP_JUMPX)
|
||||
return int(pc + LUAU_INSN_E(insn) + 1);
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
inline bool isBlockTerminator(IrCmd cmd)
|
||||
{
|
||||
switch (cmd)
|
||||
{
|
||||
case IrCmd::JUMP:
|
||||
case IrCmd::JUMP_IF_TRUTHY:
|
||||
case IrCmd::JUMP_IF_FALSY:
|
||||
case IrCmd::JUMP_EQ_TAG:
|
||||
case IrCmd::JUMP_EQ_INT:
|
||||
case IrCmd::JUMP_LT_INT:
|
||||
case IrCmd::JUMP_GE_UINT:
|
||||
case IrCmd::JUMP_EQ_POINTER:
|
||||
case IrCmd::JUMP_CMP_NUM:
|
||||
case IrCmd::JUMP_CMP_ANY:
|
||||
case IrCmd::JUMP_SLOT_MATCH:
|
||||
case IrCmd::RETURN:
|
||||
case IrCmd::FORGLOOP:
|
||||
case IrCmd::FORGLOOP_FALLBACK:
|
||||
case IrCmd::FORGPREP_XNEXT_FALLBACK:
|
||||
case IrCmd::FALLBACK_FORGPREP:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool isNonTerminatingJump(IrCmd cmd)
|
||||
{
|
||||
switch (cmd)
|
||||
{
|
||||
case IrCmd::TRY_NUM_TO_INDEX:
|
||||
case IrCmd::TRY_CALL_FASTGETTM:
|
||||
case IrCmd::CHECK_FASTCALL_RES:
|
||||
case IrCmd::CHECK_TAG:
|
||||
case IrCmd::CHECK_READONLY:
|
||||
case IrCmd::CHECK_NO_METATABLE:
|
||||
case IrCmd::CHECK_SAFE_ENV:
|
||||
case IrCmd::CHECK_ARRAY_SIZE:
|
||||
case IrCmd::CHECK_SLOT_MATCH:
|
||||
case IrCmd::CHECK_NODE_NO_NEXT:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool hasResult(IrCmd cmd)
|
||||
{
|
||||
switch (cmd)
|
||||
{
|
||||
case IrCmd::LOAD_TAG:
|
||||
case IrCmd::LOAD_POINTER:
|
||||
case IrCmd::LOAD_DOUBLE:
|
||||
case IrCmd::LOAD_INT:
|
||||
case IrCmd::LOAD_TVALUE:
|
||||
case IrCmd::LOAD_NODE_VALUE_TV:
|
||||
case IrCmd::LOAD_ENV:
|
||||
case IrCmd::GET_ARR_ADDR:
|
||||
case IrCmd::GET_SLOT_NODE_ADDR:
|
||||
case IrCmd::GET_HASH_NODE_ADDR:
|
||||
case IrCmd::ADD_INT:
|
||||
case IrCmd::SUB_INT:
|
||||
case IrCmd::ADD_NUM:
|
||||
case IrCmd::SUB_NUM:
|
||||
case IrCmd::MUL_NUM:
|
||||
case IrCmd::DIV_NUM:
|
||||
case IrCmd::MOD_NUM:
|
||||
case IrCmd::MIN_NUM:
|
||||
case IrCmd::MAX_NUM:
|
||||
case IrCmd::UNM_NUM:
|
||||
case IrCmd::FLOOR_NUM:
|
||||
case IrCmd::CEIL_NUM:
|
||||
case IrCmd::ROUND_NUM:
|
||||
case IrCmd::SQRT_NUM:
|
||||
case IrCmd::ABS_NUM:
|
||||
case IrCmd::NOT_ANY:
|
||||
case IrCmd::TABLE_LEN:
|
||||
case IrCmd::NEW_TABLE:
|
||||
case IrCmd::DUP_TABLE:
|
||||
case IrCmd::TRY_NUM_TO_INDEX:
|
||||
case IrCmd::TRY_CALL_FASTGETTM:
|
||||
case IrCmd::INT_TO_NUM:
|
||||
case IrCmd::UINT_TO_NUM:
|
||||
case IrCmd::NUM_TO_INT:
|
||||
case IrCmd::NUM_TO_UINT:
|
||||
case IrCmd::SUBSTITUTE:
|
||||
case IrCmd::INVOKE_FASTCALL:
|
||||
case IrCmd::BITAND_UINT:
|
||||
case IrCmd::BITXOR_UINT:
|
||||
case IrCmd::BITOR_UINT:
|
||||
case IrCmd::BITNOT_UINT:
|
||||
case IrCmd::BITLSHIFT_UINT:
|
||||
case IrCmd::BITRSHIFT_UINT:
|
||||
case IrCmd::BITARSHIFT_UINT:
|
||||
case IrCmd::BITLROTATE_UINT:
|
||||
case IrCmd::BITRROTATE_UINT:
|
||||
case IrCmd::BITCOUNTLZ_UINT:
|
||||
case IrCmd::BITCOUNTRZ_UINT:
|
||||
case IrCmd::INVOKE_LIBM:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool hasSideEffects(IrCmd cmd)
|
||||
{
|
||||
if (cmd == IrCmd::INVOKE_FASTCALL)
|
||||
return true;
|
||||
|
||||
// Instructions that don't produce a result most likely have other side-effects to make them useful
|
||||
// Right now, a full switch would mirror the 'hasResult' function, so we use this simple condition
|
||||
return !hasResult(cmd);
|
||||
}
|
||||
|
||||
inline bool isPseudo(IrCmd cmd)
|
||||
{
|
||||
// Instructions that are used for internal needs and are not a part of final lowering
|
||||
return cmd == IrCmd::NOP || cmd == IrCmd::SUBSTITUTE;
|
||||
}
|
||||
|
||||
IrValueKind getCmdValueKind(IrCmd cmd);
|
||||
|
||||
bool isGCO(uint8_t tag);
|
||||
|
||||
// Manually add or remove use of an operand
|
||||
void addUse(IrFunction& function, IrOp op);
|
||||
void removeUse(IrFunction& function, IrOp op);
|
||||
|
||||
// Remove a single instruction
|
||||
void kill(IrFunction& function, IrInst& inst);
|
||||
|
||||
// Remove a range of instructions
|
||||
void kill(IrFunction& function, uint32_t start, uint32_t end);
|
||||
|
||||
// Remove a block, including all instructions inside
|
||||
void kill(IrFunction& function, IrBlock& block);
|
||||
|
||||
// Replace a single operand and update use counts (can cause chain removal of dead code)
|
||||
void replace(IrFunction& function, IrOp& original, IrOp replacement);
|
||||
|
||||
// Replace a single instruction
|
||||
// Target instruction index instead of reference is used to handle introduction of a new block terminator
|
||||
void replace(IrFunction& function, IrBlock& block, uint32_t instIdx, IrInst replacement);
|
||||
|
||||
// Replace instruction with a different value (using IrCmd::SUBSTITUTE)
|
||||
void substitute(IrFunction& function, IrInst& inst, IrOp replacement);
|
||||
|
||||
// Replace instruction arguments that point to substitutions with target values
|
||||
void applySubstitutions(IrFunction& function, IrOp& op);
|
||||
void applySubstitutions(IrFunction& function, IrInst& inst);
|
||||
|
||||
// Compare numbers using IR condition value
|
||||
bool compare(double a, double b, IrCondition cond);
|
||||
|
||||
// Perform constant folding on instruction at index
|
||||
// For most instructions, successful folding results in a IrCmd::SUBSTITUTE
|
||||
// But it can also be successful on conditional control-flow, replacing it with an unconditional IrCmd::JUMP
|
||||
void foldConstants(IrBuilder& build, IrFunction& function, IrBlock& block, uint32_t instIdx);
|
||||
|
||||
uint32_t getNativeContextOffset(int bfid);
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,18 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
struct Label
|
||||
{
|
||||
uint32_t id = 0;
|
||||
uint32_t location = ~0u;
|
||||
};
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,145 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/Common.h"
|
||||
#include "Luau/RegisterX64.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
namespace X64
|
||||
{
|
||||
|
||||
enum class CategoryX64 : uint8_t
|
||||
{
|
||||
reg,
|
||||
mem,
|
||||
imm,
|
||||
};
|
||||
|
||||
struct OperandX64
|
||||
{
|
||||
constexpr OperandX64(RegisterX64 reg)
|
||||
: cat(CategoryX64::reg)
|
||||
, index(noreg)
|
||||
, base(reg)
|
||||
, memSize(SizeX64::none)
|
||||
, scale(1)
|
||||
, imm(0)
|
||||
{
|
||||
}
|
||||
|
||||
constexpr OperandX64(int32_t imm)
|
||||
: cat(CategoryX64::imm)
|
||||
, index(noreg)
|
||||
, base(noreg)
|
||||
, memSize(SizeX64::none)
|
||||
, scale(1)
|
||||
, imm(imm)
|
||||
{
|
||||
}
|
||||
|
||||
constexpr explicit OperandX64(SizeX64 size, RegisterX64 index, uint8_t scale, RegisterX64 base, int32_t disp)
|
||||
: cat(CategoryX64::mem)
|
||||
, index(index)
|
||||
, base(base)
|
||||
, memSize(size)
|
||||
, scale(scale)
|
||||
, imm(disp)
|
||||
{
|
||||
}
|
||||
|
||||
// Fields are carefully placed to make this struct fit into an 8 byte register
|
||||
CategoryX64 cat;
|
||||
RegisterX64 index;
|
||||
RegisterX64 base;
|
||||
SizeX64 memSize : 4;
|
||||
uint8_t scale : 4;
|
||||
int32_t imm;
|
||||
|
||||
constexpr OperandX64 operator[](OperandX64&& addr) const
|
||||
{
|
||||
LUAU_ASSERT(cat == CategoryX64::mem);
|
||||
LUAU_ASSERT(index == noreg && scale == 1 && base == noreg && imm == 0);
|
||||
LUAU_ASSERT(addr.memSize == SizeX64::none);
|
||||
|
||||
addr.cat = CategoryX64::mem;
|
||||
addr.memSize = memSize;
|
||||
return addr;
|
||||
}
|
||||
};
|
||||
|
||||
constexpr OperandX64 addr{SizeX64::none, noreg, 1, noreg, 0};
|
||||
constexpr OperandX64 byte{SizeX64::byte, noreg, 1, noreg, 0};
|
||||
constexpr OperandX64 word{SizeX64::word, noreg, 1, noreg, 0};
|
||||
constexpr OperandX64 dword{SizeX64::dword, noreg, 1, noreg, 0};
|
||||
constexpr OperandX64 qword{SizeX64::qword, noreg, 1, noreg, 0};
|
||||
constexpr OperandX64 xmmword{SizeX64::xmmword, noreg, 1, noreg, 0};
|
||||
constexpr OperandX64 ymmword{SizeX64::ymmword, noreg, 1, noreg, 0};
|
||||
|
||||
constexpr OperandX64 operator*(RegisterX64 reg, uint8_t scale)
|
||||
{
|
||||
if (scale == 1)
|
||||
return OperandX64(reg);
|
||||
|
||||
LUAU_ASSERT(scale == 1 || scale == 2 || scale == 4 || scale == 8);
|
||||
LUAU_ASSERT(reg.index != 0b100 && "can't scale SP");
|
||||
|
||||
return OperandX64(SizeX64::none, reg, scale, noreg, 0);
|
||||
}
|
||||
|
||||
constexpr OperandX64 operator+(RegisterX64 reg, int32_t disp)
|
||||
{
|
||||
return OperandX64(SizeX64::none, noreg, 1, reg, disp);
|
||||
}
|
||||
|
||||
constexpr OperandX64 operator-(RegisterX64 reg, int32_t disp)
|
||||
{
|
||||
return OperandX64(SizeX64::none, noreg, 1, reg, -disp);
|
||||
}
|
||||
|
||||
constexpr OperandX64 operator+(RegisterX64 base, RegisterX64 index)
|
||||
{
|
||||
LUAU_ASSERT(index.index != 4 && "sp cannot be used as index");
|
||||
LUAU_ASSERT(base.size == index.size);
|
||||
|
||||
return OperandX64(SizeX64::none, index, 1, base, 0);
|
||||
}
|
||||
|
||||
constexpr OperandX64 operator+(OperandX64 op, int32_t disp)
|
||||
{
|
||||
LUAU_ASSERT(op.cat == CategoryX64::mem);
|
||||
LUAU_ASSERT(op.memSize == SizeX64::none);
|
||||
|
||||
op.imm += disp;
|
||||
return op;
|
||||
}
|
||||
|
||||
constexpr OperandX64 operator+(OperandX64 op, RegisterX64 base)
|
||||
{
|
||||
LUAU_ASSERT(op.cat == CategoryX64::mem);
|
||||
LUAU_ASSERT(op.memSize == SizeX64::none);
|
||||
LUAU_ASSERT(op.base == noreg);
|
||||
LUAU_ASSERT(op.index == noreg || op.index.size == base.size);
|
||||
|
||||
op.base = base;
|
||||
return op;
|
||||
}
|
||||
|
||||
constexpr OperandX64 operator+(RegisterX64 base, OperandX64 op)
|
||||
{
|
||||
LUAU_ASSERT(op.cat == CategoryX64::mem);
|
||||
LUAU_ASSERT(op.memSize == SizeX64::none);
|
||||
LUAU_ASSERT(op.base == noreg);
|
||||
LUAU_ASSERT(op.index == noreg || op.index.size == base.size);
|
||||
|
||||
op.base = base;
|
||||
return op;
|
||||
}
|
||||
|
||||
} // namespace X64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,17 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/IrData.h"
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
struct IrBuilder;
|
||||
|
||||
void constPropInBlockChains(IrBuilder& build, bool useValueNumbering);
|
||||
void createLinearBlocks(IrBuilder& build, bool useValueNumbering);
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,14 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/IrData.h"
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
void optimizeMemoryOperandsX64(IrFunction& function);
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,233 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/Common.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
namespace A64
|
||||
{
|
||||
|
||||
enum class KindA64 : uint8_t
|
||||
{
|
||||
none,
|
||||
w, // 32-bit GPR
|
||||
x, // 64-bit GPR
|
||||
s, // 32-bit SIMD&FP scalar
|
||||
d, // 64-bit SIMD&FP scalar
|
||||
q, // 128-bit SIMD&FP vector
|
||||
};
|
||||
|
||||
struct RegisterA64
|
||||
{
|
||||
KindA64 kind : 3;
|
||||
uint8_t index : 5;
|
||||
|
||||
constexpr bool operator==(RegisterA64 rhs) const
|
||||
{
|
||||
return kind == rhs.kind && index == rhs.index;
|
||||
}
|
||||
|
||||
constexpr bool operator!=(RegisterA64 rhs) const
|
||||
{
|
||||
return !(*this == rhs);
|
||||
}
|
||||
};
|
||||
|
||||
constexpr RegisterA64 castReg(KindA64 kind, RegisterA64 reg)
|
||||
{
|
||||
LUAU_ASSERT(kind != reg.kind);
|
||||
LUAU_ASSERT(kind != KindA64::none && reg.kind != KindA64::none);
|
||||
LUAU_ASSERT((kind == KindA64::w || kind == KindA64::x) == (reg.kind == KindA64::w || reg.kind == KindA64::x));
|
||||
|
||||
return RegisterA64{kind, reg.index};
|
||||
}
|
||||
|
||||
// This is equivalent to castReg(KindA64::x), but is separate because it implies different semantics
|
||||
// Specifically, there are cases when it's useful to treat a wN register as an xN register *after* it has been assigned a value
|
||||
// Since all A64 instructions that write to wN implicitly zero the top half, this works when we need zero extension semantics
|
||||
// Crucially, this is *not* safe on an ABI boundary - an int parameter in wN register may have anything in its top half in certain cases
|
||||
// However, as long as our codegen doesn't use 32-bit truncation by using castReg x=>w, we can safely rely on this.
|
||||
constexpr RegisterA64 zextReg(RegisterA64 reg)
|
||||
{
|
||||
LUAU_ASSERT(reg.kind == KindA64::w);
|
||||
|
||||
return RegisterA64{KindA64::x, reg.index};
|
||||
}
|
||||
|
||||
constexpr RegisterA64 noreg{KindA64::none, 0};
|
||||
|
||||
constexpr RegisterA64 w0{KindA64::w, 0};
|
||||
constexpr RegisterA64 w1{KindA64::w, 1};
|
||||
constexpr RegisterA64 w2{KindA64::w, 2};
|
||||
constexpr RegisterA64 w3{KindA64::w, 3};
|
||||
constexpr RegisterA64 w4{KindA64::w, 4};
|
||||
constexpr RegisterA64 w5{KindA64::w, 5};
|
||||
constexpr RegisterA64 w6{KindA64::w, 6};
|
||||
constexpr RegisterA64 w7{KindA64::w, 7};
|
||||
constexpr RegisterA64 w8{KindA64::w, 8};
|
||||
constexpr RegisterA64 w9{KindA64::w, 9};
|
||||
constexpr RegisterA64 w10{KindA64::w, 10};
|
||||
constexpr RegisterA64 w11{KindA64::w, 11};
|
||||
constexpr RegisterA64 w12{KindA64::w, 12};
|
||||
constexpr RegisterA64 w13{KindA64::w, 13};
|
||||
constexpr RegisterA64 w14{KindA64::w, 14};
|
||||
constexpr RegisterA64 w15{KindA64::w, 15};
|
||||
constexpr RegisterA64 w16{KindA64::w, 16};
|
||||
constexpr RegisterA64 w17{KindA64::w, 17};
|
||||
constexpr RegisterA64 w18{KindA64::w, 18};
|
||||
constexpr RegisterA64 w19{KindA64::w, 19};
|
||||
constexpr RegisterA64 w20{KindA64::w, 20};
|
||||
constexpr RegisterA64 w21{KindA64::w, 21};
|
||||
constexpr RegisterA64 w22{KindA64::w, 22};
|
||||
constexpr RegisterA64 w23{KindA64::w, 23};
|
||||
constexpr RegisterA64 w24{KindA64::w, 24};
|
||||
constexpr RegisterA64 w25{KindA64::w, 25};
|
||||
constexpr RegisterA64 w26{KindA64::w, 26};
|
||||
constexpr RegisterA64 w27{KindA64::w, 27};
|
||||
constexpr RegisterA64 w28{KindA64::w, 28};
|
||||
constexpr RegisterA64 w29{KindA64::w, 29};
|
||||
constexpr RegisterA64 w30{KindA64::w, 30};
|
||||
constexpr RegisterA64 wzr{KindA64::w, 31};
|
||||
|
||||
constexpr RegisterA64 x0{KindA64::x, 0};
|
||||
constexpr RegisterA64 x1{KindA64::x, 1};
|
||||
constexpr RegisterA64 x2{KindA64::x, 2};
|
||||
constexpr RegisterA64 x3{KindA64::x, 3};
|
||||
constexpr RegisterA64 x4{KindA64::x, 4};
|
||||
constexpr RegisterA64 x5{KindA64::x, 5};
|
||||
constexpr RegisterA64 x6{KindA64::x, 6};
|
||||
constexpr RegisterA64 x7{KindA64::x, 7};
|
||||
constexpr RegisterA64 x8{KindA64::x, 8};
|
||||
constexpr RegisterA64 x9{KindA64::x, 9};
|
||||
constexpr RegisterA64 x10{KindA64::x, 10};
|
||||
constexpr RegisterA64 x11{KindA64::x, 11};
|
||||
constexpr RegisterA64 x12{KindA64::x, 12};
|
||||
constexpr RegisterA64 x13{KindA64::x, 13};
|
||||
constexpr RegisterA64 x14{KindA64::x, 14};
|
||||
constexpr RegisterA64 x15{KindA64::x, 15};
|
||||
constexpr RegisterA64 x16{KindA64::x, 16};
|
||||
constexpr RegisterA64 x17{KindA64::x, 17};
|
||||
constexpr RegisterA64 x18{KindA64::x, 18};
|
||||
constexpr RegisterA64 x19{KindA64::x, 19};
|
||||
constexpr RegisterA64 x20{KindA64::x, 20};
|
||||
constexpr RegisterA64 x21{KindA64::x, 21};
|
||||
constexpr RegisterA64 x22{KindA64::x, 22};
|
||||
constexpr RegisterA64 x23{KindA64::x, 23};
|
||||
constexpr RegisterA64 x24{KindA64::x, 24};
|
||||
constexpr RegisterA64 x25{KindA64::x, 25};
|
||||
constexpr RegisterA64 x26{KindA64::x, 26};
|
||||
constexpr RegisterA64 x27{KindA64::x, 27};
|
||||
constexpr RegisterA64 x28{KindA64::x, 28};
|
||||
constexpr RegisterA64 x29{KindA64::x, 29};
|
||||
constexpr RegisterA64 x30{KindA64::x, 30};
|
||||
constexpr RegisterA64 xzr{KindA64::x, 31};
|
||||
|
||||
constexpr RegisterA64 sp{KindA64::none, 31};
|
||||
|
||||
constexpr RegisterA64 s0{KindA64::s, 0};
|
||||
constexpr RegisterA64 s1{KindA64::s, 1};
|
||||
constexpr RegisterA64 s2{KindA64::s, 2};
|
||||
constexpr RegisterA64 s3{KindA64::s, 3};
|
||||
constexpr RegisterA64 s4{KindA64::s, 4};
|
||||
constexpr RegisterA64 s5{KindA64::s, 5};
|
||||
constexpr RegisterA64 s6{KindA64::s, 6};
|
||||
constexpr RegisterA64 s7{KindA64::s, 7};
|
||||
constexpr RegisterA64 s8{KindA64::s, 8};
|
||||
constexpr RegisterA64 s9{KindA64::s, 9};
|
||||
constexpr RegisterA64 s10{KindA64::s, 10};
|
||||
constexpr RegisterA64 s11{KindA64::s, 11};
|
||||
constexpr RegisterA64 s12{KindA64::s, 12};
|
||||
constexpr RegisterA64 s13{KindA64::s, 13};
|
||||
constexpr RegisterA64 s14{KindA64::s, 14};
|
||||
constexpr RegisterA64 s15{KindA64::s, 15};
|
||||
constexpr RegisterA64 s16{KindA64::s, 16};
|
||||
constexpr RegisterA64 s17{KindA64::s, 17};
|
||||
constexpr RegisterA64 s18{KindA64::s, 18};
|
||||
constexpr RegisterA64 s19{KindA64::s, 19};
|
||||
constexpr RegisterA64 s20{KindA64::s, 20};
|
||||
constexpr RegisterA64 s21{KindA64::s, 21};
|
||||
constexpr RegisterA64 s22{KindA64::s, 22};
|
||||
constexpr RegisterA64 s23{KindA64::s, 23};
|
||||
constexpr RegisterA64 s24{KindA64::s, 24};
|
||||
constexpr RegisterA64 s25{KindA64::s, 25};
|
||||
constexpr RegisterA64 s26{KindA64::s, 26};
|
||||
constexpr RegisterA64 s27{KindA64::s, 27};
|
||||
constexpr RegisterA64 s28{KindA64::s, 28};
|
||||
constexpr RegisterA64 s29{KindA64::s, 29};
|
||||
constexpr RegisterA64 s30{KindA64::s, 30};
|
||||
constexpr RegisterA64 s31{KindA64::s, 31};
|
||||
|
||||
constexpr RegisterA64 d0{KindA64::d, 0};
|
||||
constexpr RegisterA64 d1{KindA64::d, 1};
|
||||
constexpr RegisterA64 d2{KindA64::d, 2};
|
||||
constexpr RegisterA64 d3{KindA64::d, 3};
|
||||
constexpr RegisterA64 d4{KindA64::d, 4};
|
||||
constexpr RegisterA64 d5{KindA64::d, 5};
|
||||
constexpr RegisterA64 d6{KindA64::d, 6};
|
||||
constexpr RegisterA64 d7{KindA64::d, 7};
|
||||
constexpr RegisterA64 d8{KindA64::d, 8};
|
||||
constexpr RegisterA64 d9{KindA64::d, 9};
|
||||
constexpr RegisterA64 d10{KindA64::d, 10};
|
||||
constexpr RegisterA64 d11{KindA64::d, 11};
|
||||
constexpr RegisterA64 d12{KindA64::d, 12};
|
||||
constexpr RegisterA64 d13{KindA64::d, 13};
|
||||
constexpr RegisterA64 d14{KindA64::d, 14};
|
||||
constexpr RegisterA64 d15{KindA64::d, 15};
|
||||
constexpr RegisterA64 d16{KindA64::d, 16};
|
||||
constexpr RegisterA64 d17{KindA64::d, 17};
|
||||
constexpr RegisterA64 d18{KindA64::d, 18};
|
||||
constexpr RegisterA64 d19{KindA64::d, 19};
|
||||
constexpr RegisterA64 d20{KindA64::d, 20};
|
||||
constexpr RegisterA64 d21{KindA64::d, 21};
|
||||
constexpr RegisterA64 d22{KindA64::d, 22};
|
||||
constexpr RegisterA64 d23{KindA64::d, 23};
|
||||
constexpr RegisterA64 d24{KindA64::d, 24};
|
||||
constexpr RegisterA64 d25{KindA64::d, 25};
|
||||
constexpr RegisterA64 d26{KindA64::d, 26};
|
||||
constexpr RegisterA64 d27{KindA64::d, 27};
|
||||
constexpr RegisterA64 d28{KindA64::d, 28};
|
||||
constexpr RegisterA64 d29{KindA64::d, 29};
|
||||
constexpr RegisterA64 d30{KindA64::d, 30};
|
||||
constexpr RegisterA64 d31{KindA64::d, 31};
|
||||
|
||||
constexpr RegisterA64 q0{KindA64::q, 0};
|
||||
constexpr RegisterA64 q1{KindA64::q, 1};
|
||||
constexpr RegisterA64 q2{KindA64::q, 2};
|
||||
constexpr RegisterA64 q3{KindA64::q, 3};
|
||||
constexpr RegisterA64 q4{KindA64::q, 4};
|
||||
constexpr RegisterA64 q5{KindA64::q, 5};
|
||||
constexpr RegisterA64 q6{KindA64::q, 6};
|
||||
constexpr RegisterA64 q7{KindA64::q, 7};
|
||||
constexpr RegisterA64 q8{KindA64::q, 8};
|
||||
constexpr RegisterA64 q9{KindA64::q, 9};
|
||||
constexpr RegisterA64 q10{KindA64::q, 10};
|
||||
constexpr RegisterA64 q11{KindA64::q, 11};
|
||||
constexpr RegisterA64 q12{KindA64::q, 12};
|
||||
constexpr RegisterA64 q13{KindA64::q, 13};
|
||||
constexpr RegisterA64 q14{KindA64::q, 14};
|
||||
constexpr RegisterA64 q15{KindA64::q, 15};
|
||||
constexpr RegisterA64 q16{KindA64::q, 16};
|
||||
constexpr RegisterA64 q17{KindA64::q, 17};
|
||||
constexpr RegisterA64 q18{KindA64::q, 18};
|
||||
constexpr RegisterA64 q19{KindA64::q, 19};
|
||||
constexpr RegisterA64 q20{KindA64::q, 20};
|
||||
constexpr RegisterA64 q21{KindA64::q, 21};
|
||||
constexpr RegisterA64 q22{KindA64::q, 22};
|
||||
constexpr RegisterA64 q23{KindA64::q, 23};
|
||||
constexpr RegisterA64 q24{KindA64::q, 24};
|
||||
constexpr RegisterA64 q25{KindA64::q, 25};
|
||||
constexpr RegisterA64 q26{KindA64::q, 26};
|
||||
constexpr RegisterA64 q27{KindA64::q, 27};
|
||||
constexpr RegisterA64 q28{KindA64::q, 28};
|
||||
constexpr RegisterA64 q29{KindA64::q, 29};
|
||||
constexpr RegisterA64 q30{KindA64::q, 30};
|
||||
constexpr RegisterA64 q31{KindA64::q, 31};
|
||||
|
||||
} // namespace A64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,152 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/Common.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
namespace X64
|
||||
{
|
||||
|
||||
enum class SizeX64 : uint8_t
|
||||
{
|
||||
none,
|
||||
byte,
|
||||
word,
|
||||
dword,
|
||||
qword,
|
||||
xmmword,
|
||||
ymmword,
|
||||
};
|
||||
|
||||
struct RegisterX64
|
||||
{
|
||||
SizeX64 size : 3;
|
||||
uint8_t index : 5;
|
||||
|
||||
constexpr bool operator==(RegisterX64 rhs) const
|
||||
{
|
||||
return size == rhs.size && index == rhs.index;
|
||||
}
|
||||
|
||||
constexpr bool operator!=(RegisterX64 rhs) const
|
||||
{
|
||||
return !(*this == rhs);
|
||||
}
|
||||
};
|
||||
|
||||
constexpr RegisterX64 noreg{SizeX64::none, 16};
|
||||
constexpr RegisterX64 rip{SizeX64::none, 0};
|
||||
|
||||
constexpr RegisterX64 al{SizeX64::byte, 0};
|
||||
constexpr RegisterX64 cl{SizeX64::byte, 1};
|
||||
constexpr RegisterX64 dl{SizeX64::byte, 2};
|
||||
constexpr RegisterX64 bl{SizeX64::byte, 3};
|
||||
constexpr RegisterX64 spl{SizeX64::byte, 4};
|
||||
constexpr RegisterX64 bpl{SizeX64::byte, 5};
|
||||
constexpr RegisterX64 sil{SizeX64::byte, 6};
|
||||
constexpr RegisterX64 dil{SizeX64::byte, 7};
|
||||
constexpr RegisterX64 r8b{SizeX64::byte, 8};
|
||||
constexpr RegisterX64 r9b{SizeX64::byte, 9};
|
||||
constexpr RegisterX64 r10b{SizeX64::byte, 10};
|
||||
constexpr RegisterX64 r11b{SizeX64::byte, 11};
|
||||
constexpr RegisterX64 r12b{SizeX64::byte, 12};
|
||||
constexpr RegisterX64 r13b{SizeX64::byte, 13};
|
||||
constexpr RegisterX64 r14b{SizeX64::byte, 14};
|
||||
constexpr RegisterX64 r15b{SizeX64::byte, 15};
|
||||
|
||||
constexpr RegisterX64 eax{SizeX64::dword, 0};
|
||||
constexpr RegisterX64 ecx{SizeX64::dword, 1};
|
||||
constexpr RegisterX64 edx{SizeX64::dword, 2};
|
||||
constexpr RegisterX64 ebx{SizeX64::dword, 3};
|
||||
constexpr RegisterX64 esp{SizeX64::dword, 4};
|
||||
constexpr RegisterX64 ebp{SizeX64::dword, 5};
|
||||
constexpr RegisterX64 esi{SizeX64::dword, 6};
|
||||
constexpr RegisterX64 edi{SizeX64::dword, 7};
|
||||
constexpr RegisterX64 r8d{SizeX64::dword, 8};
|
||||
constexpr RegisterX64 r9d{SizeX64::dword, 9};
|
||||
constexpr RegisterX64 r10d{SizeX64::dword, 10};
|
||||
constexpr RegisterX64 r11d{SizeX64::dword, 11};
|
||||
constexpr RegisterX64 r12d{SizeX64::dword, 12};
|
||||
constexpr RegisterX64 r13d{SizeX64::dword, 13};
|
||||
constexpr RegisterX64 r14d{SizeX64::dword, 14};
|
||||
constexpr RegisterX64 r15d{SizeX64::dword, 15};
|
||||
|
||||
constexpr RegisterX64 rax{SizeX64::qword, 0};
|
||||
constexpr RegisterX64 rcx{SizeX64::qword, 1};
|
||||
constexpr RegisterX64 rdx{SizeX64::qword, 2};
|
||||
constexpr RegisterX64 rbx{SizeX64::qword, 3};
|
||||
constexpr RegisterX64 rsp{SizeX64::qword, 4};
|
||||
constexpr RegisterX64 rbp{SizeX64::qword, 5};
|
||||
constexpr RegisterX64 rsi{SizeX64::qword, 6};
|
||||
constexpr RegisterX64 rdi{SizeX64::qword, 7};
|
||||
constexpr RegisterX64 r8{SizeX64::qword, 8};
|
||||
constexpr RegisterX64 r9{SizeX64::qword, 9};
|
||||
constexpr RegisterX64 r10{SizeX64::qword, 10};
|
||||
constexpr RegisterX64 r11{SizeX64::qword, 11};
|
||||
constexpr RegisterX64 r12{SizeX64::qword, 12};
|
||||
constexpr RegisterX64 r13{SizeX64::qword, 13};
|
||||
constexpr RegisterX64 r14{SizeX64::qword, 14};
|
||||
constexpr RegisterX64 r15{SizeX64::qword, 15};
|
||||
|
||||
constexpr RegisterX64 xmm0{SizeX64::xmmword, 0};
|
||||
constexpr RegisterX64 xmm1{SizeX64::xmmword, 1};
|
||||
constexpr RegisterX64 xmm2{SizeX64::xmmword, 2};
|
||||
constexpr RegisterX64 xmm3{SizeX64::xmmword, 3};
|
||||
constexpr RegisterX64 xmm4{SizeX64::xmmword, 4};
|
||||
constexpr RegisterX64 xmm5{SizeX64::xmmword, 5};
|
||||
constexpr RegisterX64 xmm6{SizeX64::xmmword, 6};
|
||||
constexpr RegisterX64 xmm7{SizeX64::xmmword, 7};
|
||||
constexpr RegisterX64 xmm8{SizeX64::xmmword, 8};
|
||||
constexpr RegisterX64 xmm9{SizeX64::xmmword, 9};
|
||||
constexpr RegisterX64 xmm10{SizeX64::xmmword, 10};
|
||||
constexpr RegisterX64 xmm11{SizeX64::xmmword, 11};
|
||||
constexpr RegisterX64 xmm12{SizeX64::xmmword, 12};
|
||||
constexpr RegisterX64 xmm13{SizeX64::xmmword, 13};
|
||||
constexpr RegisterX64 xmm14{SizeX64::xmmword, 14};
|
||||
constexpr RegisterX64 xmm15{SizeX64::xmmword, 15};
|
||||
|
||||
constexpr RegisterX64 ymm0{SizeX64::ymmword, 0};
|
||||
constexpr RegisterX64 ymm1{SizeX64::ymmword, 1};
|
||||
constexpr RegisterX64 ymm2{SizeX64::ymmword, 2};
|
||||
constexpr RegisterX64 ymm3{SizeX64::ymmword, 3};
|
||||
constexpr RegisterX64 ymm4{SizeX64::ymmword, 4};
|
||||
constexpr RegisterX64 ymm5{SizeX64::ymmword, 5};
|
||||
constexpr RegisterX64 ymm6{SizeX64::ymmword, 6};
|
||||
constexpr RegisterX64 ymm7{SizeX64::ymmword, 7};
|
||||
constexpr RegisterX64 ymm8{SizeX64::ymmword, 8};
|
||||
constexpr RegisterX64 ymm9{SizeX64::ymmword, 9};
|
||||
constexpr RegisterX64 ymm10{SizeX64::ymmword, 10};
|
||||
constexpr RegisterX64 ymm11{SizeX64::ymmword, 11};
|
||||
constexpr RegisterX64 ymm12{SizeX64::ymmword, 12};
|
||||
constexpr RegisterX64 ymm13{SizeX64::ymmword, 13};
|
||||
constexpr RegisterX64 ymm14{SizeX64::ymmword, 14};
|
||||
constexpr RegisterX64 ymm15{SizeX64::ymmword, 15};
|
||||
|
||||
constexpr RegisterX64 byteReg(RegisterX64 reg)
|
||||
{
|
||||
return RegisterX64{SizeX64::byte, reg.index};
|
||||
}
|
||||
|
||||
constexpr RegisterX64 wordReg(RegisterX64 reg)
|
||||
{
|
||||
return RegisterX64{SizeX64::word, reg.index};
|
||||
}
|
||||
|
||||
constexpr RegisterX64 dwordReg(RegisterX64 reg)
|
||||
{
|
||||
return RegisterX64{SizeX64::dword, reg.index};
|
||||
}
|
||||
|
||||
constexpr RegisterX64 qwordReg(RegisterX64 reg)
|
||||
{
|
||||
return RegisterX64{SizeX64::qword, reg.index};
|
||||
}
|
||||
|
||||
} // namespace X64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,61 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/RegisterA64.h"
|
||||
#include "Luau/RegisterX64.h"
|
||||
|
||||
#include <initializer_list>
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
// This value is used in 'finishFunction' to mark the function that spans to the end of the whole code block
|
||||
static uint32_t kFullBlockFuncton = ~0u;
|
||||
|
||||
class UnwindBuilder
|
||||
{
|
||||
public:
|
||||
enum Arch
|
||||
{
|
||||
X64,
|
||||
A64
|
||||
};
|
||||
|
||||
virtual ~UnwindBuilder() = default;
|
||||
|
||||
virtual void setBeginOffset(size_t beginOffset) = 0;
|
||||
virtual size_t getBeginOffset() const = 0;
|
||||
|
||||
virtual void startInfo(Arch arch) = 0;
|
||||
virtual void startFunction() = 0;
|
||||
virtual void finishFunction(uint32_t beginOffset, uint32_t endOffset) = 0;
|
||||
virtual void finishInfo() = 0;
|
||||
|
||||
// A64-specific; prologue must look like this:
|
||||
// sub sp, sp, stackSize
|
||||
// store sequence that saves regs to [sp..sp+regs.size*8) in the order specified in regs; regs should start with x29, x30 (fp, lr)
|
||||
// mov x29, sp
|
||||
virtual void prologueA64(uint32_t prologueSize, uint32_t stackSize, std::initializer_list<A64::RegisterA64> regs) = 0;
|
||||
|
||||
// X64-specific; prologue must look like this:
|
||||
// optional, indicated by setupFrame:
|
||||
// push rbp
|
||||
// mov rbp, rsp
|
||||
// push reg in the order specified in regs
|
||||
// sub rsp, stackSize
|
||||
virtual void prologueX64(uint32_t prologueSize, uint32_t stackSize, bool setupFrame, std::initializer_list<X64::RegisterX64> regs) = 0;
|
||||
|
||||
virtual size_t getSize() const = 0;
|
||||
virtual size_t getFunctionCount() const = 0;
|
||||
|
||||
// This will place the unwinding data at the target address and might update values of some fields
|
||||
virtual void finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const = 0;
|
||||
};
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,54 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/RegisterX64.h"
|
||||
#include "UnwindBuilder.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
struct UnwindFunctionDwarf2
|
||||
{
|
||||
uint32_t beginOffset;
|
||||
uint32_t endOffset;
|
||||
uint32_t fdeEntryStartPos;
|
||||
};
|
||||
|
||||
class UnwindBuilderDwarf2 : public UnwindBuilder
|
||||
{
|
||||
public:
|
||||
void setBeginOffset(size_t beginOffset) override;
|
||||
size_t getBeginOffset() const override;
|
||||
|
||||
void startInfo(Arch arch) override;
|
||||
void startFunction() override;
|
||||
void finishFunction(uint32_t beginOffset, uint32_t endOffset) override;
|
||||
void finishInfo() override;
|
||||
|
||||
void prologueA64(uint32_t prologueSize, uint32_t stackSize, std::initializer_list<A64::RegisterA64> regs) override;
|
||||
void prologueX64(uint32_t prologueSize, uint32_t stackSize, bool setupFrame, std::initializer_list<X64::RegisterX64> regs) override;
|
||||
|
||||
size_t getSize() const override;
|
||||
size_t getFunctionCount() const override;
|
||||
|
||||
void finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const override;
|
||||
|
||||
private:
|
||||
size_t beginOffset = 0;
|
||||
|
||||
std::vector<UnwindFunctionDwarf2> unwindFunctions;
|
||||
|
||||
static const unsigned kRawDataLimit = 1024;
|
||||
uint8_t rawData[kRawDataLimit];
|
||||
uint8_t* pos = rawData;
|
||||
|
||||
// We will remember the FDE location to write some of the fields like entry length, function start and size later
|
||||
uint8_t* fdeEntryStart = nullptr;
|
||||
};
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,78 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/RegisterX64.h"
|
||||
#include "UnwindBuilder.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
// This struct matches the layout of x64 RUNTIME_FUNCTION from winnt.h
|
||||
struct UnwindFunctionWin
|
||||
{
|
||||
uint32_t beginOffset;
|
||||
uint32_t endOffset;
|
||||
uint32_t unwindInfoOffset;
|
||||
};
|
||||
|
||||
// This struct matches the layout of x64 UNWIND_INFO from ehdata.h
|
||||
struct UnwindInfoWin
|
||||
{
|
||||
uint8_t version : 3;
|
||||
uint8_t flags : 5;
|
||||
uint8_t prologsize;
|
||||
uint8_t unwindcodecount;
|
||||
uint8_t framereg : 4;
|
||||
uint8_t frameregoff : 4;
|
||||
};
|
||||
|
||||
// This struct matches the layout of UNWIND_CODE from ehdata.h
|
||||
struct UnwindCodeWin
|
||||
{
|
||||
uint8_t offset;
|
||||
uint8_t opcode : 4;
|
||||
uint8_t opinfo : 4;
|
||||
};
|
||||
|
||||
class UnwindBuilderWin : public UnwindBuilder
|
||||
{
|
||||
public:
|
||||
void setBeginOffset(size_t beginOffset) override;
|
||||
size_t getBeginOffset() const override;
|
||||
|
||||
void startInfo(Arch arch) override;
|
||||
void startFunction() override;
|
||||
void finishFunction(uint32_t beginOffset, uint32_t endOffset) override;
|
||||
void finishInfo() override;
|
||||
|
||||
void prologueA64(uint32_t prologueSize, uint32_t stackSize, std::initializer_list<A64::RegisterA64> regs) override;
|
||||
void prologueX64(uint32_t prologueSize, uint32_t stackSize, bool setupFrame, std::initializer_list<X64::RegisterX64> regs) override;
|
||||
|
||||
size_t getSize() const override;
|
||||
size_t getFunctionCount() const override;
|
||||
|
||||
void finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const override;
|
||||
|
||||
private:
|
||||
size_t beginOffset = 0;
|
||||
|
||||
static const unsigned kRawDataLimit = 1024;
|
||||
uint8_t rawData[kRawDataLimit];
|
||||
uint8_t* rawDataPos = rawData;
|
||||
|
||||
std::vector<UnwindFunctionWin> unwindFunctions;
|
||||
|
||||
// Windows unwind codes are written in reverse, so we have to collect them all first
|
||||
std::vector<UnwindCodeWin> unwindCodes;
|
||||
|
||||
uint8_t prologSize = 0;
|
||||
X64::RegisterX64 frameReg = X64::noreg;
|
||||
uint8_t frameRegOffset = 0;
|
||||
};
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,18 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
// Can be used to reconfigure visibility/exports for public APIs
|
||||
#ifndef LUACODEGEN_API
|
||||
#define LUACODEGEN_API extern
|
||||
#endif
|
||||
|
||||
struct lua_State;
|
||||
|
||||
// returns 1 if Luau code generator is supported, 0 otherwise
|
||||
LUACODEGEN_API int luau_codegen_supported(void);
|
||||
|
||||
// create an instance of Luau code generator. you must check that this feature is supported using luau_codegen_supported().
|
||||
LUACODEGEN_API void luau_codegen_create(lua_State* L);
|
||||
|
||||
// build target function and all inner functions
|
||||
LUACODEGEN_API void luau_codegen_compile(lua_State* L, int idx);
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,56 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
inline int countlz(uint32_t n)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
unsigned long rl;
|
||||
return _BitScanReverse(&rl, n) ? 31 - int(rl) : 32;
|
||||
#else
|
||||
return n == 0 ? 32 : __builtin_clz(n);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline int countrz(uint32_t n)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
unsigned long rl;
|
||||
return _BitScanForward(&rl, n) ? int(rl) : 32;
|
||||
#else
|
||||
return n == 0 ? 32 : __builtin_ctz(n);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline int lrotate(uint32_t u, int s)
|
||||
{
|
||||
// MSVC doesn't recognize the rotate form that is UB-safe
|
||||
#ifdef _MSC_VER
|
||||
return _rotl(u, s);
|
||||
#else
|
||||
return (u << (s & 31)) | (u >> ((32 - s) & 31));
|
||||
#endif
|
||||
}
|
||||
|
||||
inline int rrotate(uint32_t u, int s)
|
||||
{
|
||||
// MSVC doesn't recognize the rotate form that is UB-safe
|
||||
#ifdef _MSC_VER
|
||||
return _rotr(u, s);
|
||||
#else
|
||||
return (u >> (s & 31)) | (u << ((32 - s) & 31));
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,80 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/Common.h"
|
||||
|
||||
#if defined(LUAU_BIG_ENDIAN)
|
||||
#include <endian.h>
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
|
||||
inline uint8_t* writeu8(uint8_t* target, uint8_t value)
|
||||
{
|
||||
*target = value;
|
||||
return target + sizeof(value);
|
||||
}
|
||||
|
||||
inline uint8_t* writeu32(uint8_t* target, uint32_t value)
|
||||
{
|
||||
#if defined(LUAU_BIG_ENDIAN)
|
||||
value = htole32(value);
|
||||
#endif
|
||||
|
||||
memcpy(target, &value, sizeof(value));
|
||||
return target + sizeof(value);
|
||||
}
|
||||
|
||||
inline uint8_t* writeu64(uint8_t* target, uint64_t value)
|
||||
{
|
||||
#if defined(LUAU_BIG_ENDIAN)
|
||||
value = htole64(value);
|
||||
#endif
|
||||
|
||||
memcpy(target, &value, sizeof(value));
|
||||
return target + sizeof(value);
|
||||
}
|
||||
|
||||
inline uint8_t* writeuleb128(uint8_t* target, uint64_t value)
|
||||
{
|
||||
do
|
||||
{
|
||||
uint8_t byte = value & 0x7f;
|
||||
value >>= 7;
|
||||
|
||||
if (value)
|
||||
byte |= 0x80;
|
||||
|
||||
*target++ = byte;
|
||||
} while (value);
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
inline uint8_t* writef32(uint8_t* target, float value)
|
||||
{
|
||||
#if defined(LUAU_BIG_ENDIAN)
|
||||
static_assert(sizeof(float) == sizeof(uint32_t), "type size must match to reinterpret data");
|
||||
uint32_t data;
|
||||
memcpy(&data, &value, sizeof(value));
|
||||
writeu32(target, data);
|
||||
#else
|
||||
memcpy(target, &value, sizeof(value));
|
||||
#endif
|
||||
|
||||
return target + sizeof(value);
|
||||
}
|
||||
|
||||
inline uint8_t* writef64(uint8_t* target, double value)
|
||||
{
|
||||
#if defined(LUAU_BIG_ENDIAN)
|
||||
static_assert(sizeof(double) == sizeof(uint64_t), "type size must match to reinterpret data");
|
||||
uint64_t data;
|
||||
memcpy(&data, &value, sizeof(value));
|
||||
writeu64(target, data);
|
||||
#else
|
||||
memcpy(target, &value, sizeof(value));
|
||||
#endif
|
||||
|
||||
return target + sizeof(value);
|
||||
}
|
|
@ -0,0 +1,209 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "Luau/CodeAllocator.h"
|
||||
|
||||
#include "Luau/Common.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
#include <Windows.h>
|
||||
|
||||
const size_t kPageSize = 4096;
|
||||
#else
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#if defined(__FreeBSD__) && !(_POSIX_C_SOURCE >= 200112L)
|
||||
const size_t kPageSize = getpagesize();
|
||||
#else
|
||||
const size_t kPageSize = sysconf(_SC_PAGESIZE);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static size_t alignToPageSize(size_t size)
|
||||
{
|
||||
return (size + kPageSize - 1) & ~(kPageSize - 1);
|
||||
}
|
||||
|
||||
#if defined(_WIN32)
|
||||
static uint8_t* allocatePages(size_t size)
|
||||
{
|
||||
return (uint8_t*)VirtualAlloc(nullptr, alignToPageSize(size), MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
|
||||
}
|
||||
|
||||
static void freePages(uint8_t* mem, size_t size)
|
||||
{
|
||||
if (VirtualFree(mem, 0, MEM_RELEASE) == 0)
|
||||
LUAU_ASSERT(!"failed to deallocate block memory");
|
||||
}
|
||||
|
||||
static void makePagesExecutable(uint8_t* mem, size_t size)
|
||||
{
|
||||
LUAU_ASSERT((uintptr_t(mem) & (kPageSize - 1)) == 0);
|
||||
LUAU_ASSERT(size == alignToPageSize(size));
|
||||
|
||||
DWORD oldProtect;
|
||||
if (VirtualProtect(mem, size, PAGE_EXECUTE_READ, &oldProtect) == 0)
|
||||
LUAU_ASSERT(!"failed to change page protection");
|
||||
}
|
||||
|
||||
static void flushInstructionCache(uint8_t* mem, size_t size)
|
||||
{
|
||||
if (FlushInstructionCache(GetCurrentProcess(), mem, size) == 0)
|
||||
LUAU_ASSERT(!"failed to flush instruction cache");
|
||||
}
|
||||
#else
|
||||
static uint8_t* allocatePages(size_t size)
|
||||
{
|
||||
return (uint8_t*)mmap(nullptr, alignToPageSize(size), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
|
||||
}
|
||||
|
||||
static void freePages(uint8_t* mem, size_t size)
|
||||
{
|
||||
if (munmap(mem, alignToPageSize(size)) != 0)
|
||||
LUAU_ASSERT(!"failed to deallocate block memory");
|
||||
}
|
||||
|
||||
static void makePagesExecutable(uint8_t* mem, size_t size)
|
||||
{
|
||||
LUAU_ASSERT((uintptr_t(mem) & (kPageSize - 1)) == 0);
|
||||
LUAU_ASSERT(size == alignToPageSize(size));
|
||||
|
||||
if (mprotect(mem, size, PROT_READ | PROT_EXEC) != 0)
|
||||
LUAU_ASSERT(!"failed to change page protection");
|
||||
}
|
||||
|
||||
static void flushInstructionCache(uint8_t* mem, size_t size)
|
||||
{
|
||||
__builtin___clear_cache((char*)mem, (char*)mem + size);
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
CodeAllocator::CodeAllocator(size_t blockSize, size_t maxTotalSize)
|
||||
: blockSize(blockSize)
|
||||
, maxTotalSize(maxTotalSize)
|
||||
{
|
||||
LUAU_ASSERT(blockSize > kMaxReservedDataSize);
|
||||
LUAU_ASSERT(maxTotalSize >= blockSize);
|
||||
}
|
||||
|
||||
CodeAllocator::~CodeAllocator()
|
||||
{
|
||||
if (destroyBlockUnwindInfo)
|
||||
{
|
||||
for (void* unwindInfo : unwindInfos)
|
||||
destroyBlockUnwindInfo(context, unwindInfo);
|
||||
}
|
||||
|
||||
for (uint8_t* block : blocks)
|
||||
freePages(block, blockSize);
|
||||
}
|
||||
|
||||
bool CodeAllocator::allocate(
|
||||
const uint8_t* data, size_t dataSize, const uint8_t* code, size_t codeSize, uint8_t*& result, size_t& resultSize, uint8_t*& resultCodeStart)
|
||||
{
|
||||
// 'Round up' to preserve code alignment
|
||||
size_t alignedDataSize = (dataSize + (kCodeAlignment - 1)) & ~(kCodeAlignment - 1);
|
||||
|
||||
size_t totalSize = alignedDataSize + codeSize;
|
||||
|
||||
// Function has to fit into a single block with unwinding information
|
||||
if (totalSize > blockSize - kMaxReservedDataSize)
|
||||
return false;
|
||||
|
||||
size_t startOffset = 0;
|
||||
|
||||
// We might need a new block
|
||||
if (totalSize > size_t(blockEnd - blockPos))
|
||||
{
|
||||
if (!allocateNewBlock(startOffset))
|
||||
return false;
|
||||
|
||||
LUAU_ASSERT(totalSize <= size_t(blockEnd - blockPos));
|
||||
}
|
||||
|
||||
LUAU_ASSERT((uintptr_t(blockPos) & (kPageSize - 1)) == 0); // Allocation starts on page boundary
|
||||
|
||||
size_t dataOffset = startOffset + alignedDataSize - dataSize;
|
||||
size_t codeOffset = startOffset + alignedDataSize;
|
||||
|
||||
if (dataSize)
|
||||
memcpy(blockPos + dataOffset, data, dataSize);
|
||||
if (codeSize)
|
||||
memcpy(blockPos + codeOffset, code, codeSize);
|
||||
|
||||
size_t pageAlignedSize = alignToPageSize(startOffset + totalSize);
|
||||
|
||||
makePagesExecutable(blockPos, pageAlignedSize);
|
||||
flushInstructionCache(blockPos + codeOffset, codeSize);
|
||||
|
||||
result = blockPos + startOffset;
|
||||
resultSize = totalSize;
|
||||
resultCodeStart = blockPos + codeOffset;
|
||||
|
||||
// Ensure that future allocations from the block start from a page boundary.
|
||||
// This is important since we use W^X, and writing to the previous page would require briefly removing
|
||||
// executable bit from it, which may result in access violations if that code is being executed concurrently.
|
||||
if (pageAlignedSize <= size_t(blockEnd - blockPos))
|
||||
{
|
||||
blockPos += pageAlignedSize;
|
||||
LUAU_ASSERT((uintptr_t(blockPos) & (kPageSize - 1)) == 0);
|
||||
LUAU_ASSERT(blockPos <= blockEnd);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Future allocations will need to allocate fresh blocks
|
||||
blockPos = blockEnd;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CodeAllocator::allocateNewBlock(size_t& unwindInfoSize)
|
||||
{
|
||||
// Stop allocating once we reach a global limit
|
||||
if ((blocks.size() + 1) * blockSize > maxTotalSize)
|
||||
return false;
|
||||
|
||||
uint8_t* block = allocatePages(blockSize);
|
||||
|
||||
if (!block)
|
||||
return false;
|
||||
|
||||
blockPos = block;
|
||||
blockEnd = block + blockSize;
|
||||
|
||||
blocks.push_back(block);
|
||||
|
||||
if (createBlockUnwindInfo)
|
||||
{
|
||||
void* unwindInfo = createBlockUnwindInfo(context, block, blockSize, unwindInfoSize);
|
||||
|
||||
// 'Round up' to preserve alignment of the following data and code
|
||||
unwindInfoSize = (unwindInfoSize + (kCodeAlignment - 1)) & ~(kCodeAlignment - 1);
|
||||
|
||||
LUAU_ASSERT(unwindInfoSize <= kMaxReservedDataSize);
|
||||
|
||||
if (!unwindInfo)
|
||||
return false;
|
||||
|
||||
unwindInfos.push_back(unwindInfo);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,121 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "Luau/CodeBlockUnwind.h"
|
||||
|
||||
#include "Luau/CodeAllocator.h"
|
||||
#include "Luau/UnwindBuilder.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if defined(_WIN32) && defined(_M_X64)
|
||||
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
#include <Windows.h>
|
||||
|
||||
#elif defined(__linux__) || defined(__APPLE__)
|
||||
|
||||
// Defined in unwind.h which may not be easily discoverable on various platforms
|
||||
extern "C" void __register_frame(const void*);
|
||||
extern "C" void __deregister_frame(const void*);
|
||||
|
||||
extern "C" void __unw_add_dynamic_fde() __attribute__((weak));
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
#include <sys/sysctl.h>
|
||||
#endif
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
#if defined(__linux__) || defined(__APPLE__)
|
||||
static void visitFdeEntries(char* pos, void (*cb)(const void*))
|
||||
{
|
||||
// When using glibc++ unwinder, we need to call __register_frame/__deregister_frame on the entire .eh_frame data
|
||||
// When using libc++ unwinder (libunwind), each FDE has to be handled separately
|
||||
// libc++ unwinder is the macOS unwinder, but on Linux the unwinder depends on the library the executable is linked with
|
||||
// __unw_add_dynamic_fde is specific to libc++ unwinder, as such we determine the library based on its existence
|
||||
if (__unw_add_dynamic_fde == nullptr)
|
||||
return cb(pos);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
unsigned partLength;
|
||||
memcpy(&partLength, pos, sizeof(partLength));
|
||||
|
||||
if (partLength == 0) // Zero-length section signals completion
|
||||
break;
|
||||
|
||||
unsigned partId;
|
||||
memcpy(&partId, pos + 4, sizeof(partId));
|
||||
|
||||
if (partId != 0) // Skip CIE part
|
||||
cb(pos); // CIE is found using an offset in FDE
|
||||
|
||||
pos += partLength + 4;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void* createBlockUnwindInfo(void* context, uint8_t* block, size_t blockSize, size_t& beginOffset)
|
||||
{
|
||||
UnwindBuilder* unwind = (UnwindBuilder*)context;
|
||||
|
||||
// All unwinding related data is placed together at the start of the block
|
||||
size_t unwindSize = unwind->getSize();
|
||||
unwindSize = (unwindSize + (kCodeAlignment - 1)) & ~(kCodeAlignment - 1); // Match code allocator alignment
|
||||
LUAU_ASSERT(blockSize >= unwindSize);
|
||||
|
||||
char* unwindData = (char*)block;
|
||||
unwind->finalize(unwindData, unwindSize, block, blockSize);
|
||||
|
||||
#if defined(_WIN32) && defined(_M_X64)
|
||||
if (!RtlAddFunctionTable((RUNTIME_FUNCTION*)block, uint32_t(unwind->getFunctionCount()), uintptr_t(block)))
|
||||
{
|
||||
LUAU_ASSERT(!"failed to allocate function table");
|
||||
return nullptr;
|
||||
}
|
||||
#elif defined(__linux__) || defined(__APPLE__)
|
||||
visitFdeEntries(unwindData, __register_frame);
|
||||
#endif
|
||||
|
||||
beginOffset = unwindSize + unwind->getBeginOffset();
|
||||
return block;
|
||||
}
|
||||
|
||||
void destroyBlockUnwindInfo(void* context, void* unwindData)
|
||||
{
|
||||
#if defined(_WIN32) && defined(_M_X64)
|
||||
if (!RtlDeleteFunctionTable((RUNTIME_FUNCTION*)unwindData))
|
||||
LUAU_ASSERT(!"failed to deallocate function table");
|
||||
#elif defined(__linux__) || defined(__APPLE__)
|
||||
visitFdeEntries((char*)unwindData, __deregister_frame);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool isUnwindSupported()
|
||||
{
|
||||
#if defined(_WIN32) && defined(_M_X64)
|
||||
return true;
|
||||
#elif defined(__APPLE__) && defined(__aarch64__)
|
||||
char ver[256];
|
||||
size_t verLength = sizeof(ver);
|
||||
// libunwind on macOS 12 and earlier (which maps to osrelease 21) assumes JIT frames use pointer authentication without a way to override that
|
||||
return sysctlbyname("kern.osrelease", ver, &verLength, NULL, 0) == 0 && atoi(ver) >= 22;
|
||||
#elif defined(__linux__) || defined(__APPLE__)
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,616 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "Luau/CodeGen.h"
|
||||
|
||||
#include "Luau/Common.h"
|
||||
#include "Luau/CodeAllocator.h"
|
||||
#include "Luau/CodeBlockUnwind.h"
|
||||
#include "Luau/IrAnalysis.h"
|
||||
#include "Luau/IrBuilder.h"
|
||||
#include "Luau/IrDump.h"
|
||||
#include "Luau/IrUtils.h"
|
||||
#include "Luau/OptimizeConstProp.h"
|
||||
#include "Luau/OptimizeFinalX64.h"
|
||||
|
||||
#include "Luau/UnwindBuilder.h"
|
||||
#include "Luau/UnwindBuilderDwarf2.h"
|
||||
#include "Luau/UnwindBuilderWin.h"
|
||||
|
||||
#include "Luau/AssemblyBuilderA64.h"
|
||||
#include "Luau/AssemblyBuilderX64.h"
|
||||
|
||||
#include "CustomExecUtils.h"
|
||||
#include "NativeState.h"
|
||||
|
||||
#include "CodeGenA64.h"
|
||||
#include "EmitCommonA64.h"
|
||||
#include "IrLoweringA64.h"
|
||||
|
||||
#include "CodeGenX64.h"
|
||||
#include "EmitCommonX64.h"
|
||||
#include "EmitInstructionX64.h"
|
||||
#include "IrLoweringX64.h"
|
||||
|
||||
#include "lapi.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
|
||||
#if defined(__x86_64__) || defined(_M_X64)
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h> // __cpuid
|
||||
#else
|
||||
#include <cpuid.h> // __cpuid
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__)
|
||||
#ifdef __APPLE__
|
||||
#include <sys/sysctl.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
LUAU_FASTFLAGVARIABLE(DebugCodegenNoOpt, false)
|
||||
LUAU_FASTFLAGVARIABLE(DebugCodegenOptSize, false)
|
||||
LUAU_FASTFLAGVARIABLE(DebugCodegenSkipNumbering, false)
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
static void* gPerfLogContext = nullptr;
|
||||
static PerfLogFn gPerfLogFn = nullptr;
|
||||
|
||||
struct NativeProto
|
||||
{
|
||||
Proto* p;
|
||||
void* execdata;
|
||||
uintptr_t exectarget;
|
||||
};
|
||||
|
||||
static NativeProto createNativeProto(Proto* proto, const IrBuilder& ir)
|
||||
{
|
||||
int sizecode = proto->sizecode;
|
||||
|
||||
uint32_t* instOffsets = new uint32_t[sizecode];
|
||||
uint32_t instTarget = ir.function.bcMapping[0].asmLocation;
|
||||
|
||||
for (int i = 0; i < sizecode; i++)
|
||||
{
|
||||
LUAU_ASSERT(ir.function.bcMapping[i].asmLocation >= instTarget);
|
||||
|
||||
instOffsets[i] = ir.function.bcMapping[i].asmLocation - instTarget;
|
||||
}
|
||||
|
||||
// entry target will be relocated when assembly is finalized
|
||||
return {proto, instOffsets, instTarget};
|
||||
}
|
||||
|
||||
static void destroyExecData(void* execdata)
|
||||
{
|
||||
delete[] static_cast<uint32_t*>(execdata);
|
||||
}
|
||||
|
||||
static void logPerfFunction(Proto* p, uintptr_t addr, unsigned size)
|
||||
{
|
||||
LUAU_ASSERT(p->source);
|
||||
|
||||
const char* source = getstr(p->source);
|
||||
source = (source[0] == '=' || source[0] == '@') ? source + 1 : "[string]";
|
||||
|
||||
char name[256];
|
||||
snprintf(name, sizeof(name), "<luau> %s:%d %s", source, p->linedefined, p->debugname ? getstr(p->debugname) : "");
|
||||
|
||||
if (gPerfLogFn)
|
||||
gPerfLogFn(gPerfLogContext, addr, size, name);
|
||||
}
|
||||
|
||||
template<typename AssemblyBuilder, typename IrLowering>
|
||||
static bool lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction& function, int bytecodeid, AssemblyOptions options)
|
||||
{
|
||||
// While we will need a better block ordering in the future, right now we want to mostly preserve build order with fallbacks outlined
|
||||
std::vector<uint32_t> sortedBlocks;
|
||||
sortedBlocks.reserve(function.blocks.size());
|
||||
for (uint32_t i = 0; i < function.blocks.size(); i++)
|
||||
sortedBlocks.push_back(i);
|
||||
|
||||
std::sort(sortedBlocks.begin(), sortedBlocks.end(), [&](uint32_t idxA, uint32_t idxB) {
|
||||
const IrBlock& a = function.blocks[idxA];
|
||||
const IrBlock& b = function.blocks[idxB];
|
||||
|
||||
// Place fallback blocks at the end
|
||||
if ((a.kind == IrBlockKind::Fallback) != (b.kind == IrBlockKind::Fallback))
|
||||
return (a.kind == IrBlockKind::Fallback) < (b.kind == IrBlockKind::Fallback);
|
||||
|
||||
// Try to order by instruction order
|
||||
return a.start < b.start;
|
||||
});
|
||||
|
||||
// For each IR instruction that begins a bytecode instruction, which bytecode instruction is it?
|
||||
std::vector<uint32_t> bcLocations(function.instructions.size() + 1, ~0u);
|
||||
|
||||
for (size_t i = 0; i < function.bcMapping.size(); ++i)
|
||||
{
|
||||
uint32_t irLocation = function.bcMapping[i].irLocation;
|
||||
|
||||
if (irLocation != ~0u)
|
||||
bcLocations[irLocation] = uint32_t(i);
|
||||
}
|
||||
|
||||
bool outputEnabled = options.includeAssembly || options.includeIr;
|
||||
|
||||
IrToStringContext ctx{build.text, function.blocks, function.constants, function.cfg};
|
||||
|
||||
// We use this to skip outlined fallback blocks from IR/asm text output
|
||||
size_t textSize = build.text.length();
|
||||
uint32_t codeSize = build.getCodeSize();
|
||||
bool seenFallback = false;
|
||||
|
||||
IrBlock dummy;
|
||||
dummy.start = ~0u;
|
||||
|
||||
for (size_t i = 0; i < sortedBlocks.size(); ++i)
|
||||
{
|
||||
uint32_t blockIndex = sortedBlocks[i];
|
||||
IrBlock& block = function.blocks[blockIndex];
|
||||
|
||||
if (block.kind == IrBlockKind::Dead)
|
||||
continue;
|
||||
|
||||
LUAU_ASSERT(block.start != ~0u);
|
||||
LUAU_ASSERT(block.finish != ~0u);
|
||||
|
||||
// If we want to skip fallback code IR/asm, we'll record when those blocks start once we see them
|
||||
if (block.kind == IrBlockKind::Fallback && !seenFallback)
|
||||
{
|
||||
textSize = build.text.length();
|
||||
codeSize = build.getCodeSize();
|
||||
seenFallback = true;
|
||||
}
|
||||
|
||||
if (options.includeIr)
|
||||
{
|
||||
build.logAppend("# ");
|
||||
toStringDetailed(ctx, block, blockIndex, /* includeUseInfo */ true);
|
||||
}
|
||||
|
||||
// Values can only reference restore operands in the current block
|
||||
function.validRestoreOpBlockIdx = blockIndex;
|
||||
|
||||
build.setLabel(block.label);
|
||||
|
||||
for (uint32_t index = block.start; index <= block.finish; index++)
|
||||
{
|
||||
LUAU_ASSERT(index < function.instructions.size());
|
||||
|
||||
uint32_t bcLocation = bcLocations[index];
|
||||
|
||||
// If IR instruction is the first one for the original bytecode, we can annotate it with source code text
|
||||
if (outputEnabled && options.annotator && bcLocation != ~0u)
|
||||
{
|
||||
options.annotator(options.annotatorContext, build.text, bytecodeid, bcLocation);
|
||||
}
|
||||
|
||||
// If bytecode needs the location of this instruction for jumps, record it
|
||||
if (bcLocation != ~0u)
|
||||
{
|
||||
Label label = (index == block.start) ? block.label : build.setLabel();
|
||||
function.bcMapping[bcLocation].asmLocation = build.getLabelOffset(label);
|
||||
}
|
||||
|
||||
IrInst& inst = function.instructions[index];
|
||||
|
||||
// Skip pseudo instructions, but make sure they are not used at this stage
|
||||
// This also prevents them from getting into text output when that's enabled
|
||||
if (isPseudo(inst.cmd))
|
||||
{
|
||||
LUAU_ASSERT(inst.useCount == 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Either instruction result value is not referenced or the use count is not zero
|
||||
LUAU_ASSERT(inst.lastUse == 0 || inst.useCount != 0);
|
||||
|
||||
if (options.includeIr)
|
||||
{
|
||||
build.logAppend("# ");
|
||||
toStringDetailed(ctx, block, blockIndex, inst, index, /* includeUseInfo */ true);
|
||||
}
|
||||
|
||||
IrBlock& next = i + 1 < sortedBlocks.size() ? function.blocks[sortedBlocks[i + 1]] : dummy;
|
||||
|
||||
lowering.lowerInst(inst, index, next);
|
||||
|
||||
if (lowering.hasError())
|
||||
{
|
||||
// Place labels for all blocks that we're skipping
|
||||
// This is needed to avoid AssemblyBuilder assertions about jumps in earlier blocks with unplaced labels
|
||||
for (size_t j = i + 1; j < sortedBlocks.size(); ++j)
|
||||
{
|
||||
IrBlock& abandoned = function.blocks[sortedBlocks[j]];
|
||||
|
||||
build.setLabel(abandoned.label);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
lowering.finishBlock();
|
||||
|
||||
if (options.includeIr)
|
||||
build.logAppend("#\n");
|
||||
}
|
||||
|
||||
if (outputEnabled && !options.includeOutlinedCode && seenFallback)
|
||||
{
|
||||
build.text.resize(textSize);
|
||||
|
||||
if (options.includeAssembly)
|
||||
build.logAppend("; skipping %u bytes of outlined code\n", unsigned((build.getCodeSize() - codeSize) * sizeof(build.code[0])));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
[[maybe_unused]] static bool lowerIr(
|
||||
X64::AssemblyBuilderX64& build, IrBuilder& ir, NativeState& data, ModuleHelpers& helpers, Proto* proto, AssemblyOptions options)
|
||||
{
|
||||
optimizeMemoryOperandsX64(ir.function);
|
||||
|
||||
X64::IrLoweringX64 lowering(build, helpers, data, ir.function);
|
||||
|
||||
return lowerImpl(build, lowering, ir.function, proto->bytecodeid, options);
|
||||
}
|
||||
|
||||
[[maybe_unused]] static bool lowerIr(
|
||||
A64::AssemblyBuilderA64& build, IrBuilder& ir, NativeState& data, ModuleHelpers& helpers, Proto* proto, AssemblyOptions options)
|
||||
{
|
||||
A64::IrLoweringA64 lowering(build, helpers, data, proto, ir.function);
|
||||
|
||||
return lowerImpl(build, lowering, ir.function, proto->bytecodeid, options);
|
||||
}
|
||||
|
||||
template<typename AssemblyBuilder>
|
||||
static std::optional<NativeProto> assembleFunction(AssemblyBuilder& build, NativeState& data, ModuleHelpers& helpers, Proto* proto, AssemblyOptions options)
|
||||
{
|
||||
if (options.includeAssembly || options.includeIr)
|
||||
{
|
||||
if (proto->debugname)
|
||||
build.logAppend("; function %s(", getstr(proto->debugname));
|
||||
else
|
||||
build.logAppend("; function(");
|
||||
|
||||
for (int i = 0; i < proto->numparams; i++)
|
||||
{
|
||||
LocVar* var = proto->locvars ? &proto->locvars[proto->sizelocvars - proto->numparams + i] : nullptr;
|
||||
|
||||
if (var && var->varname)
|
||||
build.logAppend("%s%s", i == 0 ? "" : ", ", getstr(var->varname));
|
||||
else
|
||||
build.logAppend("%s$arg%d", i == 0 ? "" : ", ", i);
|
||||
}
|
||||
|
||||
if (proto->numparams != 0 && proto->is_vararg)
|
||||
build.logAppend(", ...)");
|
||||
else
|
||||
build.logAppend(")");
|
||||
|
||||
if (proto->linedefined >= 0)
|
||||
build.logAppend(" line %d\n", proto->linedefined);
|
||||
else
|
||||
build.logAppend("\n");
|
||||
}
|
||||
|
||||
IrBuilder ir;
|
||||
ir.buildFunctionIr(proto);
|
||||
|
||||
computeCfgInfo(ir.function);
|
||||
|
||||
if (!FFlag::DebugCodegenNoOpt)
|
||||
{
|
||||
bool useValueNumbering = !FFlag::DebugCodegenSkipNumbering;
|
||||
|
||||
constPropInBlockChains(ir, useValueNumbering);
|
||||
|
||||
if (!FFlag::DebugCodegenOptSize)
|
||||
createLinearBlocks(ir, useValueNumbering);
|
||||
}
|
||||
|
||||
if (!lowerIr(build, ir, data, helpers, proto, options))
|
||||
{
|
||||
if (build.logText)
|
||||
build.logAppend("; skipping (can't lower)\n\n");
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (build.logText)
|
||||
build.logAppend("\n");
|
||||
|
||||
return createNativeProto(proto, ir);
|
||||
}
|
||||
|
||||
static void onCloseState(lua_State* L)
|
||||
{
|
||||
destroyNativeState(L);
|
||||
}
|
||||
|
||||
static void onDestroyFunction(lua_State* L, Proto* proto)
|
||||
{
|
||||
destroyExecData(proto->execdata);
|
||||
proto->execdata = nullptr;
|
||||
proto->exectarget = 0;
|
||||
}
|
||||
|
||||
static int onEnter(lua_State* L, Proto* proto)
|
||||
{
|
||||
NativeState* data = getNativeState(L);
|
||||
|
||||
LUAU_ASSERT(proto->execdata);
|
||||
LUAU_ASSERT(L->ci->savedpc >= proto->code && L->ci->savedpc < proto->code + proto->sizecode);
|
||||
|
||||
uintptr_t target = proto->exectarget + static_cast<uint32_t*>(proto->execdata)[L->ci->savedpc - proto->code];
|
||||
|
||||
// Returns 1 to finish the function in the VM
|
||||
return GateFn(data->context.gateEntry)(L, proto, target, &data->context);
|
||||
}
|
||||
|
||||
static void onSetBreakpoint(lua_State* L, Proto* proto, int instruction)
|
||||
{
|
||||
if (!proto->execdata)
|
||||
return;
|
||||
|
||||
LUAU_ASSERT(!"native breakpoints are not implemented");
|
||||
}
|
||||
|
||||
#if defined(__aarch64__)
|
||||
static unsigned int getCpuFeaturesA64()
|
||||
{
|
||||
unsigned int result = 0;
|
||||
|
||||
#ifdef __APPLE__
|
||||
int jscvt = 0;
|
||||
size_t jscvtLen = sizeof(jscvt);
|
||||
if (sysctlbyname("hw.optional.arm.FEAT_JSCVT", &jscvt, &jscvtLen, nullptr, 0) == 0 && jscvt == 1)
|
||||
result |= A64::Feature_JSCVT;
|
||||
#endif
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool isSupported()
|
||||
{
|
||||
if (!LUA_CUSTOM_EXECUTION)
|
||||
return false;
|
||||
|
||||
if (LUA_EXTRA_SIZE != 1)
|
||||
return false;
|
||||
|
||||
if (sizeof(TValue) != 16)
|
||||
return false;
|
||||
|
||||
if (sizeof(LuaNode) != 32)
|
||||
return false;
|
||||
|
||||
// Windows CRT uses stack unwinding in longjmp so we have to use unwind data; on other platforms, it's only necessary for C++ EH.
|
||||
#if defined(_WIN32)
|
||||
if (!isUnwindSupported())
|
||||
return false;
|
||||
#else
|
||||
if (!LUA_USE_LONGJMP && !isUnwindSupported())
|
||||
return false;
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) || defined(_M_X64)
|
||||
int cpuinfo[4] = {};
|
||||
#ifdef _MSC_VER
|
||||
__cpuid(cpuinfo, 1);
|
||||
#else
|
||||
__cpuid(1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
|
||||
#endif
|
||||
|
||||
// We require AVX1 support for VEX encoded XMM operations
|
||||
// We also requre SSE4.1 support for ROUNDSD but the AVX check below covers it
|
||||
// https://en.wikipedia.org/wiki/CPUID#EAX=1:_Processor_Info_and_Feature_Bits
|
||||
if ((cpuinfo[2] & (1 << 28)) == 0)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
#elif defined(__aarch64__)
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
void create(lua_State* L)
|
||||
{
|
||||
LUAU_ASSERT(isSupported());
|
||||
|
||||
NativeState& data = *createNativeState(L);
|
||||
|
||||
#if defined(_WIN32)
|
||||
data.unwindBuilder = std::make_unique<UnwindBuilderWin>();
|
||||
#else
|
||||
data.unwindBuilder = std::make_unique<UnwindBuilderDwarf2>();
|
||||
#endif
|
||||
|
||||
data.codeAllocator.context = data.unwindBuilder.get();
|
||||
data.codeAllocator.createBlockUnwindInfo = createBlockUnwindInfo;
|
||||
data.codeAllocator.destroyBlockUnwindInfo = destroyBlockUnwindInfo;
|
||||
|
||||
initFunctions(data);
|
||||
|
||||
#if defined(__x86_64__) || defined(_M_X64)
|
||||
if (!X64::initHeaderFunctions(data))
|
||||
{
|
||||
destroyNativeState(L);
|
||||
return;
|
||||
}
|
||||
#elif defined(__aarch64__)
|
||||
if (!A64::initHeaderFunctions(data))
|
||||
{
|
||||
destroyNativeState(L);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (gPerfLogFn)
|
||||
gPerfLogFn(gPerfLogContext, uintptr_t(data.context.gateEntry), 4096, "<luau gate>");
|
||||
|
||||
lua_ExecutionCallbacks* ecb = getExecutionCallbacks(L);
|
||||
|
||||
ecb->close = onCloseState;
|
||||
ecb->destroy = onDestroyFunction;
|
||||
ecb->enter = onEnter;
|
||||
ecb->setbreakpoint = onSetBreakpoint;
|
||||
}
|
||||
|
||||
static void gatherFunctions(std::vector<Proto*>& results, Proto* proto)
|
||||
{
|
||||
if (results.size() <= size_t(proto->bytecodeid))
|
||||
results.resize(proto->bytecodeid + 1);
|
||||
|
||||
// Skip protos that we've already compiled in this run: this happens because at -O2, inlined functions get their protos reused
|
||||
if (results[proto->bytecodeid])
|
||||
return;
|
||||
|
||||
results[proto->bytecodeid] = proto;
|
||||
|
||||
for (int i = 0; i < proto->sizep; i++)
|
||||
gatherFunctions(results, proto->p[i]);
|
||||
}
|
||||
|
||||
void compile(lua_State* L, int idx)
|
||||
{
|
||||
LUAU_ASSERT(lua_isLfunction(L, idx));
|
||||
const TValue* func = luaA_toobject(L, idx);
|
||||
|
||||
// If initialization has failed, do not compile any functions
|
||||
if (!getNativeState(L))
|
||||
return;
|
||||
|
||||
#if defined(__aarch64__)
|
||||
A64::AssemblyBuilderA64 build(/* logText= */ false, getCpuFeaturesA64());
|
||||
#else
|
||||
X64::AssemblyBuilderX64 build(/* logText= */ false);
|
||||
#endif
|
||||
|
||||
NativeState* data = getNativeState(L);
|
||||
|
||||
std::vector<Proto*> protos;
|
||||
gatherFunctions(protos, clvalue(func)->l.p);
|
||||
|
||||
ModuleHelpers helpers;
|
||||
#if defined(__aarch64__)
|
||||
A64::assembleHelpers(build, helpers);
|
||||
#else
|
||||
X64::assembleHelpers(build, helpers);
|
||||
#endif
|
||||
|
||||
std::vector<NativeProto> results;
|
||||
results.reserve(protos.size());
|
||||
|
||||
// Skip protos that have been compiled during previous invocations of CodeGen::compile
|
||||
for (Proto* p : protos)
|
||||
if (p && p->execdata == nullptr)
|
||||
if (std::optional<NativeProto> np = assembleFunction(build, *data, helpers, p, {}))
|
||||
results.push_back(*np);
|
||||
|
||||
// Very large modules might result in overflowing a jump offset; in this case we currently abandon the entire module
|
||||
if (!build.finalize())
|
||||
{
|
||||
for (NativeProto result : results)
|
||||
destroyExecData(result.execdata);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// If no functions were assembled, we don't need to allocate/copy executable pages for helpers
|
||||
if (results.empty())
|
||||
return;
|
||||
|
||||
uint8_t* nativeData = nullptr;
|
||||
size_t sizeNativeData = 0;
|
||||
uint8_t* codeStart = nullptr;
|
||||
if (!data->codeAllocator.allocate(build.data.data(), int(build.data.size()), reinterpret_cast<const uint8_t*>(build.code.data()),
|
||||
int(build.code.size() * sizeof(build.code[0])), nativeData, sizeNativeData, codeStart))
|
||||
{
|
||||
for (NativeProto result : results)
|
||||
destroyExecData(result.execdata);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (gPerfLogFn && results.size() > 0)
|
||||
{
|
||||
gPerfLogFn(gPerfLogContext, uintptr_t(codeStart), uint32_t(results[0].exectarget), "<luau helpers>");
|
||||
|
||||
for (size_t i = 0; i < results.size(); ++i)
|
||||
{
|
||||
uint32_t begin = uint32_t(results[i].exectarget);
|
||||
uint32_t end = i + 1 < results.size() ? uint32_t(results[i + 1].exectarget) : uint32_t(build.code.size() * sizeof(build.code[0]));
|
||||
LUAU_ASSERT(begin < end);
|
||||
|
||||
logPerfFunction(results[i].p, uintptr_t(codeStart) + begin, end - begin);
|
||||
}
|
||||
}
|
||||
|
||||
for (NativeProto result : results)
|
||||
{
|
||||
// the memory is now managed by VM and will be freed via onDestroyFunction
|
||||
result.p->execdata = result.execdata;
|
||||
result.p->exectarget = uintptr_t(codeStart) + result.exectarget;
|
||||
}
|
||||
}
|
||||
|
||||
std::string getAssembly(lua_State* L, int idx, AssemblyOptions options)
|
||||
{
|
||||
LUAU_ASSERT(lua_isLfunction(L, idx));
|
||||
const TValue* func = luaA_toobject(L, idx);
|
||||
|
||||
#if defined(__aarch64__)
|
||||
A64::AssemblyBuilderA64 build(/* logText= */ options.includeAssembly, getCpuFeaturesA64());
|
||||
#else
|
||||
X64::AssemblyBuilderX64 build(/* logText= */ options.includeAssembly);
|
||||
#endif
|
||||
|
||||
NativeState data;
|
||||
initFunctions(data);
|
||||
|
||||
std::vector<Proto*> protos;
|
||||
gatherFunctions(protos, clvalue(func)->l.p);
|
||||
|
||||
ModuleHelpers helpers;
|
||||
#if defined(__aarch64__)
|
||||
A64::assembleHelpers(build, helpers);
|
||||
#else
|
||||
X64::assembleHelpers(build, helpers);
|
||||
#endif
|
||||
|
||||
for (Proto* p : protos)
|
||||
if (p)
|
||||
if (std::optional<NativeProto> np = assembleFunction(build, data, helpers, p, options))
|
||||
destroyExecData(np->execdata);
|
||||
|
||||
if (!build.finalize())
|
||||
return std::string();
|
||||
|
||||
if (options.outputBinary)
|
||||
return std::string(reinterpret_cast<const char*>(build.code.data()), reinterpret_cast<const char*>(build.code.data() + build.code.size())) +
|
||||
std::string(build.data.begin(), build.data.end());
|
||||
else
|
||||
return build.text;
|
||||
}
|
||||
|
||||
void setPerfLog(void* context, PerfLogFn logFn)
|
||||
{
|
||||
gPerfLogContext = context;
|
||||
gPerfLogFn = logFn;
|
||||
}
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,236 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "CodeGenA64.h"
|
||||
|
||||
#include "Luau/AssemblyBuilderA64.h"
|
||||
#include "Luau/UnwindBuilder.h"
|
||||
|
||||
#include "BitUtils.h"
|
||||
#include "CustomExecUtils.h"
|
||||
#include "NativeState.h"
|
||||
#include "EmitCommonA64.h"
|
||||
|
||||
#include "lstate.h"
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
namespace A64
|
||||
{
|
||||
|
||||
struct EntryLocations
|
||||
{
|
||||
Label start;
|
||||
Label prologueEnd;
|
||||
Label epilogueStart;
|
||||
};
|
||||
|
||||
static void emitExit(AssemblyBuilderA64& build, bool continueInVm)
|
||||
{
|
||||
build.mov(x0, continueInVm);
|
||||
build.ldr(x1, mem(rNativeContext, offsetof(NativeContext, gateExit)));
|
||||
build.br(x1);
|
||||
}
|
||||
|
||||
static void emitInterrupt(AssemblyBuilderA64& build)
|
||||
{
|
||||
// x0 = pc offset
|
||||
// x1 = return address in native code
|
||||
|
||||
Label skip;
|
||||
|
||||
// Stash return address in rBase; we need to reload rBase anyway
|
||||
build.mov(rBase, x1);
|
||||
|
||||
// Load interrupt handler; it may be nullptr in case the update raced with the check before we got here
|
||||
build.ldr(x2, mem(rState, offsetof(lua_State, global)));
|
||||
build.ldr(x2, mem(x2, offsetof(global_State, cb.interrupt)));
|
||||
build.cbz(x2, skip);
|
||||
|
||||
// Update savedpc; required in case interrupt errors
|
||||
build.add(x0, rCode, x0);
|
||||
build.ldr(x1, mem(rState, offsetof(lua_State, ci)));
|
||||
build.str(x0, mem(x1, offsetof(CallInfo, savedpc)));
|
||||
|
||||
// Call interrupt
|
||||
build.mov(x0, rState);
|
||||
build.mov(w1, -1);
|
||||
build.blr(x2);
|
||||
|
||||
// Check if we need to exit
|
||||
build.ldrb(w0, mem(rState, offsetof(lua_State, status)));
|
||||
build.cbz(w0, skip);
|
||||
|
||||
// L->ci->savedpc--
|
||||
// note: recomputing this avoids having to stash x0
|
||||
build.ldr(x1, mem(rState, offsetof(lua_State, ci)));
|
||||
build.ldr(x0, mem(x1, offsetof(CallInfo, savedpc)));
|
||||
build.sub(x0, x0, sizeof(Instruction));
|
||||
build.str(x0, mem(x1, offsetof(CallInfo, savedpc)));
|
||||
|
||||
emitExit(build, /* continueInVm */ false);
|
||||
|
||||
build.setLabel(skip);
|
||||
|
||||
// Return back to caller; rBase has stashed return address
|
||||
build.mov(x0, rBase);
|
||||
|
||||
emitUpdateBase(build); // interrupt may have reallocated stack
|
||||
|
||||
build.br(x0);
|
||||
}
|
||||
|
||||
static void emitReentry(AssemblyBuilderA64& build, ModuleHelpers& helpers)
|
||||
{
|
||||
// x0 = closure object to reentry (equal to clvalue(L->ci->func))
|
||||
|
||||
// If the fallback requested an exit, we need to do this right away
|
||||
build.cbz(x0, helpers.exitNoContinueVm);
|
||||
|
||||
emitUpdateBase(build);
|
||||
|
||||
// Need to update state of the current function before we jump away
|
||||
build.ldr(x1, mem(x0, offsetof(Closure, l.p))); // cl->l.p aka proto
|
||||
|
||||
build.ldr(x2, mem(rState, offsetof(lua_State, ci))); // L->ci
|
||||
|
||||
// We need to check if the new frame can be executed natively
|
||||
// TOOD: .flags and .savedpc load below can be fused with ldp
|
||||
build.ldr(w3, mem(x2, offsetof(CallInfo, flags)));
|
||||
build.tbz(x3, countrz(LUA_CALLINFO_CUSTOM), helpers.exitContinueVm);
|
||||
|
||||
build.mov(rClosure, x0);
|
||||
build.ldr(rConstants, mem(x1, offsetof(Proto, k))); // proto->k
|
||||
build.ldr(rCode, mem(x1, offsetof(Proto, code))); // proto->code
|
||||
|
||||
// Get instruction index from instruction pointer
|
||||
// To get instruction index from instruction pointer, we need to divide byte offset by 4
|
||||
// But we will actually need to scale instruction index by 4 back to byte offset later so it cancels out
|
||||
build.ldr(x2, mem(x2, offsetof(CallInfo, savedpc))); // L->ci->savedpc
|
||||
build.sub(x2, x2, rCode);
|
||||
|
||||
// Get new instruction location and jump to it
|
||||
LUAU_ASSERT(offsetof(Proto, exectarget) == offsetof(Proto, execdata) + 8);
|
||||
build.ldp(x3, x4, mem(x1, offsetof(Proto, execdata)));
|
||||
build.ldr(w2, mem(x3, x2));
|
||||
build.add(x4, x4, x2);
|
||||
build.br(x4);
|
||||
}
|
||||
|
||||
static EntryLocations buildEntryFunction(AssemblyBuilderA64& build, UnwindBuilder& unwind)
|
||||
{
|
||||
EntryLocations locations;
|
||||
|
||||
// Arguments: x0 = lua_State*, x1 = Proto*, x2 = native code pointer to jump to, x3 = NativeContext*
|
||||
|
||||
locations.start = build.setLabel();
|
||||
|
||||
// prologue
|
||||
build.sub(sp, sp, kStackSize);
|
||||
build.stp(x29, x30, mem(sp)); // fp, lr
|
||||
|
||||
// stash non-volatile registers used for execution environment
|
||||
build.stp(x19, x20, mem(sp, 16));
|
||||
build.stp(x21, x22, mem(sp, 32));
|
||||
build.stp(x23, x24, mem(sp, 48));
|
||||
|
||||
build.mov(x29, sp); // this is only necessary if we maintain frame pointers, which we do in the JIT for now
|
||||
|
||||
locations.prologueEnd = build.setLabel();
|
||||
|
||||
uint32_t prologueSize = build.getLabelOffset(locations.prologueEnd) - build.getLabelOffset(locations.start);
|
||||
|
||||
// Setup native execution environment
|
||||
build.mov(rState, x0);
|
||||
build.mov(rNativeContext, x3);
|
||||
|
||||
build.ldr(rBase, mem(x0, offsetof(lua_State, base))); // L->base
|
||||
build.ldr(rConstants, mem(x1, offsetof(Proto, k))); // proto->k
|
||||
build.ldr(rCode, mem(x1, offsetof(Proto, code))); // proto->code
|
||||
|
||||
build.ldr(x9, mem(x0, offsetof(lua_State, ci))); // L->ci
|
||||
build.ldr(x9, mem(x9, offsetof(CallInfo, func))); // L->ci->func
|
||||
build.ldr(rClosure, mem(x9, offsetof(TValue, value.gc))); // L->ci->func->value.gc aka cl
|
||||
|
||||
// Jump to the specified instruction; further control flow will be handled with custom ABI with register setup from EmitCommonA64.h
|
||||
build.br(x2);
|
||||
|
||||
// Even though we jumped away, we will return here in the end
|
||||
locations.epilogueStart = build.setLabel();
|
||||
|
||||
// Cleanup and exit
|
||||
build.ldp(x23, x24, mem(sp, 48));
|
||||
build.ldp(x21, x22, mem(sp, 32));
|
||||
build.ldp(x19, x20, mem(sp, 16));
|
||||
build.ldp(x29, x30, mem(sp)); // fp, lr
|
||||
build.add(sp, sp, kStackSize);
|
||||
|
||||
build.ret();
|
||||
|
||||
// Our entry function is special, it spans the whole remaining code area
|
||||
unwind.startFunction();
|
||||
unwind.prologueA64(prologueSize, kStackSize, {x29, x30, x19, x20, x21, x22, x23, x24});
|
||||
unwind.finishFunction(build.getLabelOffset(locations.start), kFullBlockFuncton);
|
||||
|
||||
return locations;
|
||||
}
|
||||
|
||||
bool initHeaderFunctions(NativeState& data)
|
||||
{
|
||||
AssemblyBuilderA64 build(/* logText= */ false);
|
||||
UnwindBuilder& unwind = *data.unwindBuilder.get();
|
||||
|
||||
unwind.startInfo(UnwindBuilder::A64);
|
||||
|
||||
EntryLocations entryLocations = buildEntryFunction(build, unwind);
|
||||
|
||||
build.finalize();
|
||||
|
||||
unwind.finishInfo();
|
||||
|
||||
LUAU_ASSERT(build.data.empty());
|
||||
|
||||
uint8_t* codeStart = nullptr;
|
||||
if (!data.codeAllocator.allocate(build.data.data(), int(build.data.size()), reinterpret_cast<const uint8_t*>(build.code.data()),
|
||||
int(build.code.size() * sizeof(build.code[0])), data.gateData, data.gateDataSize, codeStart))
|
||||
{
|
||||
LUAU_ASSERT(!"failed to create entry function");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set the offset at the begining so that functions in new blocks will not overlay the locations
|
||||
// specified by the unwind information of the entry function
|
||||
unwind.setBeginOffset(build.getLabelOffset(entryLocations.prologueEnd));
|
||||
|
||||
data.context.gateEntry = codeStart + build.getLabelOffset(entryLocations.start);
|
||||
data.context.gateExit = codeStart + build.getLabelOffset(entryLocations.epilogueStart);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void assembleHelpers(AssemblyBuilderA64& build, ModuleHelpers& helpers)
|
||||
{
|
||||
if (build.logText)
|
||||
build.logAppend("; exitContinueVm\n");
|
||||
helpers.exitContinueVm = build.setLabel();
|
||||
emitExit(build, /* continueInVm */ true);
|
||||
|
||||
if (build.logText)
|
||||
build.logAppend("; exitNoContinueVm\n");
|
||||
helpers.exitNoContinueVm = build.setLabel();
|
||||
emitExit(build, /* continueInVm */ false);
|
||||
|
||||
if (build.logText)
|
||||
build.logAppend("; reentry\n");
|
||||
helpers.reentry = build.setLabel();
|
||||
emitReentry(build, helpers);
|
||||
|
||||
if (build.logText)
|
||||
build.logAppend("; interrupt\n");
|
||||
helpers.interrupt = build.setLabel();
|
||||
emitInterrupt(build);
|
||||
}
|
||||
|
||||
} // namespace A64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,22 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
struct NativeState;
|
||||
struct ModuleHelpers;
|
||||
|
||||
namespace A64
|
||||
{
|
||||
|
||||
class AssemblyBuilderA64;
|
||||
|
||||
bool initHeaderFunctions(NativeState& data);
|
||||
void assembleHelpers(AssemblyBuilderA64& build, ModuleHelpers& helpers);
|
||||
|
||||
} // namespace A64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,959 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "CodeGenUtils.h"
|
||||
|
||||
#include "CustomExecUtils.h"
|
||||
|
||||
#include "lvm.h"
|
||||
|
||||
#include "lbuiltins.h"
|
||||
#include "lbytecode.h"
|
||||
#include "ldebug.h"
|
||||
#include "ldo.h"
|
||||
#include "lfunc.h"
|
||||
#include "lgc.h"
|
||||
#include "lmem.h"
|
||||
#include "lnumutils.h"
|
||||
#include "lstate.h"
|
||||
#include "lstring.h"
|
||||
#include "ltable.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
LUAU_FASTFLAG(LuauUniformTopHandling)
|
||||
|
||||
// All external function calls that can cause stack realloc or Lua calls have to be wrapped in VM_PROTECT
|
||||
// This makes sure that we save the pc (in case the Lua call needs to generate a backtrace) before the call,
|
||||
// and restores the stack pointer after in case stack gets reallocated
|
||||
// Should only be used on the slow paths.
|
||||
#define VM_PROTECT(x) \
|
||||
{ \
|
||||
L->ci->savedpc = pc; \
|
||||
{ \
|
||||
x; \
|
||||
}; \
|
||||
base = L->base; \
|
||||
}
|
||||
|
||||
// Some external functions can cause an error, but never reallocate the stack; for these, VM_PROTECT_PC() is
|
||||
// a cheaper version of VM_PROTECT that can be called before the external call.
|
||||
#define VM_PROTECT_PC() L->ci->savedpc = pc
|
||||
|
||||
#define VM_REG(i) (LUAU_ASSERT(unsigned(i) < unsigned(L->top - base)), &base[i])
|
||||
#define VM_KV(i) (LUAU_ASSERT(unsigned(i) < unsigned(cl->l.p->sizek)), &k[i])
|
||||
#define VM_UV(i) (LUAU_ASSERT(unsigned(i) < unsigned(cl->nupvalues)), &cl->l.uprefs[i])
|
||||
|
||||
#define VM_PATCH_C(pc, slot) *const_cast<Instruction*>(pc) = ((uint8_t(slot) << 24) | (0x00ffffffu & *(pc)))
|
||||
#define VM_PATCH_E(pc, slot) *const_cast<Instruction*>(pc) = ((uint32_t(slot) << 8) | (0x000000ffu & *(pc)))
|
||||
|
||||
#define VM_INTERRUPT() \
|
||||
{ \
|
||||
void (*interrupt)(lua_State*, int) = L->global->cb.interrupt; \
|
||||
if (LUAU_UNLIKELY(!!interrupt)) \
|
||||
{ /* the interrupt hook is called right before we advance pc */ \
|
||||
VM_PROTECT(L->ci->savedpc++; interrupt(L, -1)); \
|
||||
if (L->status != 0) \
|
||||
{ \
|
||||
L->ci->savedpc--; \
|
||||
return NULL; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
bool forgLoopTableIter(lua_State* L, Table* h, int index, TValue* ra)
|
||||
{
|
||||
int sizearray = h->sizearray;
|
||||
|
||||
// first we advance index through the array portion
|
||||
while (unsigned(index) < unsigned(sizearray))
|
||||
{
|
||||
TValue* e = &h->array[index];
|
||||
|
||||
if (!ttisnil(e))
|
||||
{
|
||||
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)));
|
||||
setnvalue(ra + 3, double(index + 1));
|
||||
setobj2s(L, ra + 4, e);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
index++;
|
||||
}
|
||||
|
||||
int sizenode = 1 << h->lsizenode;
|
||||
|
||||
// then we advance index through the hash portion
|
||||
while (unsigned(index - h->sizearray) < unsigned(sizenode))
|
||||
{
|
||||
LuaNode* n = &h->node[index - sizearray];
|
||||
|
||||
if (!ttisnil(gval(n)))
|
||||
{
|
||||
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)));
|
||||
getnodekey(L, ra + 3, n);
|
||||
setobj(L, ra + 4, gval(n));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
index++;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool forgLoopNodeIter(lua_State* L, Table* h, int index, TValue* ra)
|
||||
{
|
||||
int sizearray = h->sizearray;
|
||||
int sizenode = 1 << h->lsizenode;
|
||||
|
||||
// then we advance index through the hash portion
|
||||
while (unsigned(index - sizearray) < unsigned(sizenode))
|
||||
{
|
||||
LuaNode* n = &h->node[index - sizearray];
|
||||
|
||||
if (!ttisnil(gval(n)))
|
||||
{
|
||||
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)));
|
||||
getnodekey(L, ra + 3, n);
|
||||
setobj(L, ra + 4, gval(n));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
index++;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool forgLoopNonTableFallback(lua_State* L, int insnA, int aux)
|
||||
{
|
||||
TValue* base = L->base;
|
||||
TValue* ra = VM_REG(insnA);
|
||||
|
||||
// note: it's safe to push arguments past top for complicated reasons (see lvmexecute.cpp)
|
||||
setobj2s(L, ra + 3 + 2, ra + 2);
|
||||
setobj2s(L, ra + 3 + 1, ra + 1);
|
||||
setobj2s(L, ra + 3, ra);
|
||||
|
||||
L->top = ra + 3 + 3; // func + 2 args (state and index)
|
||||
LUAU_ASSERT(L->top <= L->stack_last);
|
||||
|
||||
luaD_call(L, ra + 3, uint8_t(aux));
|
||||
L->top = L->ci->top;
|
||||
|
||||
// recompute ra since stack might have been reallocated
|
||||
base = L->base;
|
||||
ra = VM_REG(insnA);
|
||||
|
||||
// copy first variable back into the iteration index
|
||||
setobj2s(L, ra + 2, ra + 3);
|
||||
|
||||
return !ttisnil(ra + 3);
|
||||
}
|
||||
|
||||
void forgPrepXnextFallback(lua_State* L, TValue* ra, int pc)
|
||||
{
|
||||
if (!ttisfunction(ra))
|
||||
{
|
||||
Closure* cl = clvalue(L->ci->func);
|
||||
L->ci->savedpc = cl->l.p->code + pc;
|
||||
|
||||
luaG_typeerror(L, ra, "iterate over");
|
||||
}
|
||||
}
|
||||
|
||||
Closure* callProlog(lua_State* L, TValue* ra, StkId argtop, int nresults)
|
||||
{
|
||||
// slow-path: not a function call
|
||||
if (LUAU_UNLIKELY(!ttisfunction(ra)))
|
||||
{
|
||||
luaV_tryfuncTM(L, ra);
|
||||
argtop++; // __call adds an extra self
|
||||
}
|
||||
|
||||
Closure* ccl = clvalue(ra);
|
||||
|
||||
CallInfo* ci = incr_ci(L);
|
||||
ci->func = ra;
|
||||
ci->base = ra + 1;
|
||||
ci->top = argtop + ccl->stacksize; // note: technically UB since we haven't reallocated the stack yet
|
||||
ci->savedpc = NULL;
|
||||
ci->flags = 0;
|
||||
ci->nresults = nresults;
|
||||
|
||||
L->base = ci->base;
|
||||
L->top = argtop;
|
||||
|
||||
// note: this reallocs stack, but we don't need to VM_PROTECT this
|
||||
// this is because we're going to modify base/savedpc manually anyhow
|
||||
// crucially, we can't use ra/argtop after this line
|
||||
luaD_checkstack(L, ccl->stacksize);
|
||||
|
||||
return ccl;
|
||||
}
|
||||
|
||||
void callEpilogC(lua_State* L, int nresults, int n)
|
||||
{
|
||||
// ci is our callinfo, cip is our parent
|
||||
CallInfo* ci = L->ci;
|
||||
CallInfo* cip = ci - 1;
|
||||
|
||||
// copy return values into parent stack (but only up to nresults!), fill the rest with nil
|
||||
// note: in MULTRET context nresults starts as -1 so i != 0 condition never activates intentionally
|
||||
StkId res = ci->func;
|
||||
StkId vali = L->top - n;
|
||||
StkId valend = L->top;
|
||||
|
||||
int i;
|
||||
for (i = nresults; i != 0 && vali < valend; i--)
|
||||
setobj2s(L, res++, vali++);
|
||||
while (i-- > 0)
|
||||
setnilvalue(res++);
|
||||
|
||||
// pop the stack frame
|
||||
L->ci = cip;
|
||||
L->base = cip->base;
|
||||
L->top = (nresults == LUA_MULTRET) ? res : cip->top;
|
||||
}
|
||||
|
||||
// Extracted as-is from lvmexecute.cpp with the exception of control flow (reentry) and removed interrupts/savedpc
|
||||
Closure* callFallback(lua_State* L, StkId ra, StkId argtop, int nresults)
|
||||
{
|
||||
// slow-path: not a function call
|
||||
if (LUAU_UNLIKELY(!ttisfunction(ra)))
|
||||
{
|
||||
luaV_tryfuncTM(L, ra);
|
||||
argtop++; // __call adds an extra self
|
||||
}
|
||||
|
||||
Closure* ccl = clvalue(ra);
|
||||
|
||||
CallInfo* ci = incr_ci(L);
|
||||
ci->func = ra;
|
||||
ci->base = ra + 1;
|
||||
ci->top = argtop + ccl->stacksize; // note: technically UB since we haven't reallocated the stack yet
|
||||
ci->savedpc = NULL;
|
||||
ci->flags = 0;
|
||||
ci->nresults = nresults;
|
||||
|
||||
L->base = ci->base;
|
||||
L->top = argtop;
|
||||
|
||||
// note: this reallocs stack, but we don't need to VM_PROTECT this
|
||||
// this is because we're going to modify base/savedpc manually anyhow
|
||||
// crucially, we can't use ra/argtop after this line
|
||||
luaD_checkstack(L, ccl->stacksize);
|
||||
|
||||
LUAU_ASSERT(ci->top <= L->stack_last);
|
||||
|
||||
if (!ccl->isC)
|
||||
{
|
||||
Proto* p = ccl->l.p;
|
||||
|
||||
// fill unused parameters with nil
|
||||
StkId argi = L->top;
|
||||
StkId argend = L->base + p->numparams;
|
||||
while (argi < argend)
|
||||
setnilvalue(argi++); // complete missing arguments
|
||||
L->top = p->is_vararg ? argi : ci->top;
|
||||
|
||||
// keep executing new function
|
||||
ci->savedpc = p->code;
|
||||
|
||||
if (LUAU_LIKELY(p->execdata != NULL))
|
||||
ci->flags = LUA_CALLINFO_CUSTOM;
|
||||
|
||||
return ccl;
|
||||
}
|
||||
else
|
||||
{
|
||||
lua_CFunction func = ccl->c.f;
|
||||
int n = func(L);
|
||||
|
||||
// yield
|
||||
if (n < 0)
|
||||
return NULL;
|
||||
|
||||
// ci is our callinfo, cip is our parent
|
||||
CallInfo* ci = L->ci;
|
||||
CallInfo* cip = ci - 1;
|
||||
|
||||
// copy return values into parent stack (but only up to nresults!), fill the rest with nil
|
||||
// note: in MULTRET context nresults starts as -1 so i != 0 condition never activates intentionally
|
||||
StkId res = ci->func;
|
||||
StkId vali = L->top - n;
|
||||
StkId valend = L->top;
|
||||
|
||||
int i;
|
||||
for (i = nresults; i != 0 && vali < valend; i--)
|
||||
setobj2s(L, res++, vali++);
|
||||
while (i-- > 0)
|
||||
setnilvalue(res++);
|
||||
|
||||
// pop the stack frame
|
||||
L->ci = cip;
|
||||
L->base = cip->base;
|
||||
L->top = (nresults == LUA_MULTRET) ? res : cip->top;
|
||||
|
||||
// keep executing current function
|
||||
LUAU_ASSERT(isLua(cip));
|
||||
return clvalue(cip->func);
|
||||
}
|
||||
}
|
||||
|
||||
// Extracted as-is from lvmexecute.cpp with the exception of control flow (reentry) and removed interrupts
|
||||
Closure* returnFallback(lua_State* L, StkId ra, StkId valend)
|
||||
{
|
||||
// ci is our callinfo, cip is our parent
|
||||
CallInfo* ci = L->ci;
|
||||
CallInfo* cip = ci - 1;
|
||||
|
||||
StkId res = ci->func; // note: we assume CALL always puts func+args and expects results to start at func
|
||||
StkId vali = ra;
|
||||
|
||||
int nresults = ci->nresults;
|
||||
|
||||
// copy return values into parent stack (but only up to nresults!), fill the rest with nil
|
||||
// note: in MULTRET context nresults starts as -1 so i != 0 condition never activates intentionally
|
||||
int i;
|
||||
for (i = nresults; i != 0 && vali < valend; i--)
|
||||
setobj2s(L, res++, vali++);
|
||||
while (i-- > 0)
|
||||
setnilvalue(res++);
|
||||
|
||||
// pop the stack frame
|
||||
L->ci = cip;
|
||||
L->base = cip->base;
|
||||
L->top = (nresults == LUA_MULTRET) ? res : cip->top;
|
||||
|
||||
// we're done!
|
||||
if (LUAU_UNLIKELY(ci->flags & LUA_CALLINFO_RETURN))
|
||||
{
|
||||
if (!FFlag::LuauUniformTopHandling)
|
||||
L->top = res;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// keep executing new function
|
||||
LUAU_ASSERT(isLua(cip));
|
||||
return clvalue(cip->func);
|
||||
}
|
||||
|
||||
const Instruction* executeGETGLOBAL(lua_State* L, const Instruction* pc, StkId base, TValue* k)
|
||||
{
|
||||
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
|
||||
Instruction insn = *pc++;
|
||||
StkId ra = VM_REG(LUAU_INSN_A(insn));
|
||||
uint32_t aux = *pc++;
|
||||
TValue* kv = VM_KV(aux);
|
||||
LUAU_ASSERT(ttisstring(kv));
|
||||
|
||||
// fast-path should already have been checked, so we skip checking for it here
|
||||
Table* h = cl->env;
|
||||
int slot = LUAU_INSN_C(insn) & h->nodemask8;
|
||||
|
||||
// slow-path, may invoke Lua calls via __index metamethod
|
||||
TValue g;
|
||||
sethvalue(L, &g, h);
|
||||
L->cachedslot = slot;
|
||||
VM_PROTECT(luaV_gettable(L, &g, kv, ra));
|
||||
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
|
||||
VM_PATCH_C(pc - 2, L->cachedslot);
|
||||
return pc;
|
||||
}
|
||||
|
||||
const Instruction* executeSETGLOBAL(lua_State* L, const Instruction* pc, StkId base, TValue* k)
|
||||
{
|
||||
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
|
||||
Instruction insn = *pc++;
|
||||
StkId ra = VM_REG(LUAU_INSN_A(insn));
|
||||
uint32_t aux = *pc++;
|
||||
TValue* kv = VM_KV(aux);
|
||||
LUAU_ASSERT(ttisstring(kv));
|
||||
|
||||
// fast-path should already have been checked, so we skip checking for it here
|
||||
Table* h = cl->env;
|
||||
int slot = LUAU_INSN_C(insn) & h->nodemask8;
|
||||
|
||||
// slow-path, may invoke Lua calls via __newindex metamethod
|
||||
TValue g;
|
||||
sethvalue(L, &g, h);
|
||||
L->cachedslot = slot;
|
||||
VM_PROTECT(luaV_settable(L, &g, kv, ra));
|
||||
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
|
||||
VM_PATCH_C(pc - 2, L->cachedslot);
|
||||
return pc;
|
||||
}
|
||||
|
||||
const Instruction* executeGETTABLEKS(lua_State* L, const Instruction* pc, StkId base, TValue* k)
|
||||
{
|
||||
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
|
||||
Instruction insn = *pc++;
|
||||
StkId ra = VM_REG(LUAU_INSN_A(insn));
|
||||
StkId rb = VM_REG(LUAU_INSN_B(insn));
|
||||
uint32_t aux = *pc++;
|
||||
TValue* kv = VM_KV(aux);
|
||||
LUAU_ASSERT(ttisstring(kv));
|
||||
|
||||
// fast-path: built-in table
|
||||
if (ttistable(rb))
|
||||
{
|
||||
Table* h = hvalue(rb);
|
||||
|
||||
int slot = LUAU_INSN_C(insn) & h->nodemask8;
|
||||
LuaNode* n = &h->node[slot];
|
||||
|
||||
// fast-path: value is in expected slot
|
||||
if (LUAU_LIKELY(ttisstring(gkey(n)) && tsvalue(gkey(n)) == tsvalue(kv) && !ttisnil(gval(n))))
|
||||
{
|
||||
setobj2s(L, ra, gval(n));
|
||||
return pc;
|
||||
}
|
||||
else if (!h->metatable)
|
||||
{
|
||||
// fast-path: value is not in expected slot, but the table lookup doesn't involve metatable
|
||||
const TValue* res = luaH_getstr(h, tsvalue(kv));
|
||||
|
||||
if (res != luaO_nilobject)
|
||||
{
|
||||
int cachedslot = gval2slot(h, res);
|
||||
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
|
||||
VM_PATCH_C(pc - 2, cachedslot);
|
||||
}
|
||||
|
||||
setobj2s(L, ra, res);
|
||||
return pc;
|
||||
}
|
||||
else
|
||||
{
|
||||
// slow-path, may invoke Lua calls via __index metamethod
|
||||
L->cachedslot = slot;
|
||||
VM_PROTECT(luaV_gettable(L, rb, kv, ra));
|
||||
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
|
||||
VM_PATCH_C(pc - 2, L->cachedslot);
|
||||
return pc;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// fast-path: user data with C __index TM
|
||||
const TValue* fn = 0;
|
||||
if (ttisuserdata(rb) && (fn = fasttm(L, uvalue(rb)->metatable, TM_INDEX)) && ttisfunction(fn) && clvalue(fn)->isC)
|
||||
{
|
||||
// note: it's safe to push arguments past top for complicated reasons (see top of the file)
|
||||
LUAU_ASSERT(L->top + 3 < L->stack + L->stacksize);
|
||||
StkId top = L->top;
|
||||
setobj2s(L, top + 0, fn);
|
||||
setobj2s(L, top + 1, rb);
|
||||
setobj2s(L, top + 2, kv);
|
||||
L->top = top + 3;
|
||||
|
||||
L->cachedslot = LUAU_INSN_C(insn);
|
||||
VM_PROTECT(luaV_callTM(L, 2, LUAU_INSN_A(insn)));
|
||||
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
|
||||
VM_PATCH_C(pc - 2, L->cachedslot);
|
||||
return pc;
|
||||
}
|
||||
else if (ttisvector(rb))
|
||||
{
|
||||
// fast-path: quick case-insensitive comparison with "X"/"Y"/"Z"
|
||||
const char* name = getstr(tsvalue(kv));
|
||||
int ic = (name[0] | ' ') - 'x';
|
||||
|
||||
#if LUA_VECTOR_SIZE == 4
|
||||
// 'w' is before 'x' in ascii, so ic is -1 when indexing with 'w'
|
||||
if (ic == -1)
|
||||
ic = 3;
|
||||
#endif
|
||||
|
||||
if (unsigned(ic) < LUA_VECTOR_SIZE && name[1] == '\0')
|
||||
{
|
||||
const float* v = rb->value.v; // silences ubsan when indexing v[]
|
||||
setnvalue(ra, v[ic]);
|
||||
return pc;
|
||||
}
|
||||
|
||||
fn = fasttm(L, L->global->mt[LUA_TVECTOR], TM_INDEX);
|
||||
|
||||
if (fn && ttisfunction(fn) && clvalue(fn)->isC)
|
||||
{
|
||||
// note: it's safe to push arguments past top for complicated reasons (see top of the file)
|
||||
LUAU_ASSERT(L->top + 3 < L->stack + L->stacksize);
|
||||
StkId top = L->top;
|
||||
setobj2s(L, top + 0, fn);
|
||||
setobj2s(L, top + 1, rb);
|
||||
setobj2s(L, top + 2, kv);
|
||||
L->top = top + 3;
|
||||
|
||||
L->cachedslot = LUAU_INSN_C(insn);
|
||||
VM_PROTECT(luaV_callTM(L, 2, LUAU_INSN_A(insn)));
|
||||
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
|
||||
VM_PATCH_C(pc - 2, L->cachedslot);
|
||||
return pc;
|
||||
}
|
||||
|
||||
// fall through to slow path
|
||||
}
|
||||
|
||||
// fall through to slow path
|
||||
}
|
||||
|
||||
// slow-path, may invoke Lua calls via __index metamethod
|
||||
VM_PROTECT(luaV_gettable(L, rb, kv, ra));
|
||||
return pc;
|
||||
}
|
||||
|
||||
const Instruction* executeSETTABLEKS(lua_State* L, const Instruction* pc, StkId base, TValue* k)
|
||||
{
|
||||
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
|
||||
Instruction insn = *pc++;
|
||||
StkId ra = VM_REG(LUAU_INSN_A(insn));
|
||||
StkId rb = VM_REG(LUAU_INSN_B(insn));
|
||||
uint32_t aux = *pc++;
|
||||
TValue* kv = VM_KV(aux);
|
||||
LUAU_ASSERT(ttisstring(kv));
|
||||
|
||||
// fast-path: built-in table
|
||||
if (ttistable(rb))
|
||||
{
|
||||
Table* h = hvalue(rb);
|
||||
|
||||
int slot = LUAU_INSN_C(insn) & h->nodemask8;
|
||||
LuaNode* n = &h->node[slot];
|
||||
|
||||
// fast-path: value is in expected slot
|
||||
if (LUAU_LIKELY(ttisstring(gkey(n)) && tsvalue(gkey(n)) == tsvalue(kv) && !ttisnil(gval(n)) && !h->readonly))
|
||||
{
|
||||
setobj2t(L, gval(n), ra);
|
||||
luaC_barriert(L, h, ra);
|
||||
return pc;
|
||||
}
|
||||
else if (fastnotm(h->metatable, TM_NEWINDEX) && !h->readonly)
|
||||
{
|
||||
VM_PROTECT_PC(); // set may fail
|
||||
|
||||
TValue* res = luaH_setstr(L, h, tsvalue(kv));
|
||||
int cachedslot = gval2slot(h, res);
|
||||
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
|
||||
VM_PATCH_C(pc - 2, cachedslot);
|
||||
setobj2t(L, res, ra);
|
||||
luaC_barriert(L, h, ra);
|
||||
return pc;
|
||||
}
|
||||
else
|
||||
{
|
||||
// slow-path, may invoke Lua calls via __newindex metamethod
|
||||
L->cachedslot = slot;
|
||||
VM_PROTECT(luaV_settable(L, rb, kv, ra));
|
||||
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
|
||||
VM_PATCH_C(pc - 2, L->cachedslot);
|
||||
return pc;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// fast-path: user data with C __newindex TM
|
||||
const TValue* fn = 0;
|
||||
if (ttisuserdata(rb) && (fn = fasttm(L, uvalue(rb)->metatable, TM_NEWINDEX)) && ttisfunction(fn) && clvalue(fn)->isC)
|
||||
{
|
||||
// note: it's safe to push arguments past top for complicated reasons (see top of the file)
|
||||
LUAU_ASSERT(L->top + 4 < L->stack + L->stacksize);
|
||||
StkId top = L->top;
|
||||
setobj2s(L, top + 0, fn);
|
||||
setobj2s(L, top + 1, rb);
|
||||
setobj2s(L, top + 2, kv);
|
||||
setobj2s(L, top + 3, ra);
|
||||
L->top = top + 4;
|
||||
|
||||
L->cachedslot = LUAU_INSN_C(insn);
|
||||
VM_PROTECT(luaV_callTM(L, 3, -1));
|
||||
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
|
||||
VM_PATCH_C(pc - 2, L->cachedslot);
|
||||
return pc;
|
||||
}
|
||||
else
|
||||
{
|
||||
// slow-path, may invoke Lua calls via __newindex metamethod
|
||||
VM_PROTECT(luaV_settable(L, rb, kv, ra));
|
||||
return pc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const Instruction* executeNEWCLOSURE(lua_State* L, const Instruction* pc, StkId base, TValue* k)
|
||||
{
|
||||
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
|
||||
Instruction insn = *pc++;
|
||||
StkId ra = VM_REG(LUAU_INSN_A(insn));
|
||||
|
||||
Proto* pv = cl->l.p->p[LUAU_INSN_D(insn)];
|
||||
LUAU_ASSERT(unsigned(LUAU_INSN_D(insn)) < unsigned(cl->l.p->sizep));
|
||||
|
||||
VM_PROTECT_PC(); // luaF_newLclosure may fail due to OOM
|
||||
|
||||
// note: we save closure to stack early in case the code below wants to capture it by value
|
||||
Closure* ncl = luaF_newLclosure(L, pv->nups, cl->env, pv);
|
||||
setclvalue(L, ra, ncl);
|
||||
|
||||
for (int ui = 0; ui < pv->nups; ++ui)
|
||||
{
|
||||
Instruction uinsn = *pc++;
|
||||
LUAU_ASSERT(LUAU_INSN_OP(uinsn) == LOP_CAPTURE);
|
||||
|
||||
switch (LUAU_INSN_A(uinsn))
|
||||
{
|
||||
case LCT_VAL:
|
||||
setobj(L, &ncl->l.uprefs[ui], VM_REG(LUAU_INSN_B(uinsn)));
|
||||
break;
|
||||
|
||||
case LCT_REF:
|
||||
setupvalue(L, &ncl->l.uprefs[ui], luaF_findupval(L, VM_REG(LUAU_INSN_B(uinsn))));
|
||||
break;
|
||||
|
||||
case LCT_UPVAL:
|
||||
setobj(L, &ncl->l.uprefs[ui], VM_UV(LUAU_INSN_B(uinsn)));
|
||||
break;
|
||||
|
||||
default:
|
||||
LUAU_ASSERT(!"Unknown upvalue capture type");
|
||||
LUAU_UNREACHABLE(); // improves switch() codegen by eliding opcode bounds checks
|
||||
}
|
||||
}
|
||||
|
||||
VM_PROTECT(luaC_checkGC(L));
|
||||
return pc;
|
||||
}
|
||||
|
||||
const Instruction* executeNAMECALL(lua_State* L, const Instruction* pc, StkId base, TValue* k)
|
||||
{
|
||||
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
|
||||
Instruction insn = *pc++;
|
||||
StkId ra = VM_REG(LUAU_INSN_A(insn));
|
||||
StkId rb = VM_REG(LUAU_INSN_B(insn));
|
||||
uint32_t aux = *pc++;
|
||||
TValue* kv = VM_KV(aux);
|
||||
LUAU_ASSERT(ttisstring(kv));
|
||||
|
||||
if (ttistable(rb))
|
||||
{
|
||||
Table* h = hvalue(rb);
|
||||
// note: we can't use nodemask8 here because we need to query the main position of the table, and 8-bit nodemask8 only works
|
||||
// for predictive lookups
|
||||
LuaNode* n = &h->node[tsvalue(kv)->hash & (sizenode(h) - 1)];
|
||||
|
||||
const TValue* mt = 0;
|
||||
const LuaNode* mtn = 0;
|
||||
|
||||
// fast-path: key is in the table in expected slot
|
||||
if (ttisstring(gkey(n)) && tsvalue(gkey(n)) == tsvalue(kv) && !ttisnil(gval(n)))
|
||||
{
|
||||
// note: order of copies allows rb to alias ra+1 or ra
|
||||
setobj2s(L, ra + 1, rb);
|
||||
setobj2s(L, ra, gval(n));
|
||||
}
|
||||
// fast-path: key is absent from the base, table has an __index table, and it has the result in the expected slot
|
||||
else if (gnext(n) == 0 && (mt = fasttm(L, hvalue(rb)->metatable, TM_INDEX)) && ttistable(mt) &&
|
||||
(mtn = &hvalue(mt)->node[LUAU_INSN_C(insn) & hvalue(mt)->nodemask8]) && ttisstring(gkey(mtn)) && tsvalue(gkey(mtn)) == tsvalue(kv) &&
|
||||
!ttisnil(gval(mtn)))
|
||||
{
|
||||
// note: order of copies allows rb to alias ra+1 or ra
|
||||
setobj2s(L, ra + 1, rb);
|
||||
setobj2s(L, ra, gval(mtn));
|
||||
}
|
||||
else
|
||||
{
|
||||
// slow-path: handles full table lookup
|
||||
setobj2s(L, ra + 1, rb);
|
||||
L->cachedslot = LUAU_INSN_C(insn);
|
||||
VM_PROTECT(luaV_gettable(L, rb, kv, ra));
|
||||
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
|
||||
VM_PATCH_C(pc - 2, L->cachedslot);
|
||||
// recompute ra since stack might have been reallocated
|
||||
ra = VM_REG(LUAU_INSN_A(insn));
|
||||
if (ttisnil(ra))
|
||||
luaG_methoderror(L, ra + 1, tsvalue(kv));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Table* mt = ttisuserdata(rb) ? uvalue(rb)->metatable : L->global->mt[ttype(rb)];
|
||||
const TValue* tmi = 0;
|
||||
|
||||
// fast-path: metatable with __namecall
|
||||
if (const TValue* fn = fasttm(L, mt, TM_NAMECALL))
|
||||
{
|
||||
// note: order of copies allows rb to alias ra+1 or ra
|
||||
setobj2s(L, ra + 1, rb);
|
||||
setobj2s(L, ra, fn);
|
||||
|
||||
L->namecall = tsvalue(kv);
|
||||
}
|
||||
else if ((tmi = fasttm(L, mt, TM_INDEX)) && ttistable(tmi))
|
||||
{
|
||||
Table* h = hvalue(tmi);
|
||||
int slot = LUAU_INSN_C(insn) & h->nodemask8;
|
||||
LuaNode* n = &h->node[slot];
|
||||
|
||||
// fast-path: metatable with __index that has method in expected slot
|
||||
if (LUAU_LIKELY(ttisstring(gkey(n)) && tsvalue(gkey(n)) == tsvalue(kv) && !ttisnil(gval(n))))
|
||||
{
|
||||
// note: order of copies allows rb to alias ra+1 or ra
|
||||
setobj2s(L, ra + 1, rb);
|
||||
setobj2s(L, ra, gval(n));
|
||||
}
|
||||
else
|
||||
{
|
||||
// slow-path: handles slot mismatch
|
||||
setobj2s(L, ra + 1, rb);
|
||||
L->cachedslot = slot;
|
||||
VM_PROTECT(luaV_gettable(L, rb, kv, ra));
|
||||
// save cachedslot to accelerate future lookups; patches currently executing instruction since pc-2 rolls back two pc++
|
||||
VM_PATCH_C(pc - 2, L->cachedslot);
|
||||
// recompute ra since stack might have been reallocated
|
||||
ra = VM_REG(LUAU_INSN_A(insn));
|
||||
if (ttisnil(ra))
|
||||
luaG_methoderror(L, ra + 1, tsvalue(kv));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// slow-path: handles non-table __index
|
||||
setobj2s(L, ra + 1, rb);
|
||||
VM_PROTECT(luaV_gettable(L, rb, kv, ra));
|
||||
// recompute ra since stack might have been reallocated
|
||||
ra = VM_REG(LUAU_INSN_A(insn));
|
||||
if (ttisnil(ra))
|
||||
luaG_methoderror(L, ra + 1, tsvalue(kv));
|
||||
}
|
||||
}
|
||||
|
||||
// intentional fallthrough to CALL
|
||||
LUAU_ASSERT(LUAU_INSN_OP(*pc) == LOP_CALL);
|
||||
return pc;
|
||||
}
|
||||
|
||||
const Instruction* executeSETLIST(lua_State* L, const Instruction* pc, StkId base, TValue* k)
|
||||
{
|
||||
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
|
||||
Instruction insn = *pc++;
|
||||
StkId ra = VM_REG(LUAU_INSN_A(insn));
|
||||
StkId rb = &base[LUAU_INSN_B(insn)]; // note: this can point to L->top if c == LUA_MULTRET making VM_REG unsafe to use
|
||||
int c = LUAU_INSN_C(insn) - 1;
|
||||
uint32_t index = *pc++;
|
||||
|
||||
if (c == LUA_MULTRET)
|
||||
{
|
||||
c = int(L->top - rb);
|
||||
L->top = L->ci->top;
|
||||
}
|
||||
|
||||
Table* h = hvalue(ra);
|
||||
|
||||
// TODO: we really don't need this anymore
|
||||
if (!ttistable(ra))
|
||||
return NULL; // temporary workaround to weaken a rather powerful exploitation primitive in case of a MITM attack on bytecode
|
||||
|
||||
int last = index + c - 1;
|
||||
if (last > h->sizearray)
|
||||
{
|
||||
VM_PROTECT_PC(); // luaH_resizearray may fail due to OOM
|
||||
|
||||
luaH_resizearray(L, h, last);
|
||||
}
|
||||
|
||||
TValue* array = h->array;
|
||||
|
||||
for (int i = 0; i < c; ++i)
|
||||
setobj2t(L, &array[index + i - 1], rb + i);
|
||||
|
||||
luaC_barrierfast(L, h);
|
||||
return pc;
|
||||
}
|
||||
|
||||
const Instruction* executeFORGPREP(lua_State* L, const Instruction* pc, StkId base, TValue* k)
|
||||
{
|
||||
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
|
||||
Instruction insn = *pc++;
|
||||
StkId ra = VM_REG(LUAU_INSN_A(insn));
|
||||
|
||||
if (ttisfunction(ra))
|
||||
{
|
||||
// will be called during FORGLOOP
|
||||
}
|
||||
else
|
||||
{
|
||||
Table* mt = ttistable(ra) ? hvalue(ra)->metatable : ttisuserdata(ra) ? uvalue(ra)->metatable : cast_to(Table*, NULL);
|
||||
|
||||
if (const TValue* fn = fasttm(L, mt, TM_ITER))
|
||||
{
|
||||
setobj2s(L, ra + 1, ra);
|
||||
setobj2s(L, ra, fn);
|
||||
|
||||
L->top = ra + 2; // func + self arg
|
||||
LUAU_ASSERT(L->top <= L->stack_last);
|
||||
|
||||
VM_PROTECT(luaD_call(L, ra, 3));
|
||||
L->top = L->ci->top;
|
||||
|
||||
// recompute ra since stack might have been reallocated
|
||||
ra = VM_REG(LUAU_INSN_A(insn));
|
||||
|
||||
// protect against __iter returning nil, since nil is used as a marker for builtin iteration in FORGLOOP
|
||||
if (ttisnil(ra))
|
||||
{
|
||||
VM_PROTECT_PC(); // next call always errors
|
||||
luaG_typeerror(L, ra, "call");
|
||||
}
|
||||
}
|
||||
else if (fasttm(L, mt, TM_CALL))
|
||||
{
|
||||
// table or userdata with __call, will be called during FORGLOOP
|
||||
// TODO: we might be able to stop supporting this depending on whether it's used in practice
|
||||
}
|
||||
else if (ttistable(ra))
|
||||
{
|
||||
// set up registers for builtin iteration
|
||||
setobj2s(L, ra + 1, ra);
|
||||
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(0)));
|
||||
setnilvalue(ra);
|
||||
}
|
||||
else
|
||||
{
|
||||
VM_PROTECT_PC(); // next call always errors
|
||||
luaG_typeerror(L, ra, "iterate over");
|
||||
}
|
||||
}
|
||||
|
||||
pc += LUAU_INSN_D(insn);
|
||||
LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
|
||||
return pc;
|
||||
}
|
||||
|
||||
const Instruction* executeGETVARARGS(lua_State* L, const Instruction* pc, StkId base, TValue* k)
|
||||
{
|
||||
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
|
||||
Instruction insn = *pc++;
|
||||
int b = LUAU_INSN_B(insn) - 1;
|
||||
int n = cast_int(base - L->ci->func) - cl->l.p->numparams - 1;
|
||||
|
||||
if (b == LUA_MULTRET)
|
||||
{
|
||||
VM_PROTECT(luaD_checkstack(L, n));
|
||||
StkId ra = VM_REG(LUAU_INSN_A(insn)); // previous call may change the stack
|
||||
|
||||
for (int j = 0; j < n; j++)
|
||||
setobj2s(L, ra + j, base - n + j);
|
||||
|
||||
L->top = ra + n;
|
||||
return pc;
|
||||
}
|
||||
else
|
||||
{
|
||||
StkId ra = VM_REG(LUAU_INSN_A(insn));
|
||||
|
||||
for (int j = 0; j < b && j < n; j++)
|
||||
setobj2s(L, ra + j, base - n + j);
|
||||
for (int j = n; j < b; j++)
|
||||
setnilvalue(ra + j);
|
||||
return pc;
|
||||
}
|
||||
}
|
||||
|
||||
const Instruction* executeDUPCLOSURE(lua_State* L, const Instruction* pc, StkId base, TValue* k)
|
||||
{
|
||||
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
|
||||
Instruction insn = *pc++;
|
||||
StkId ra = VM_REG(LUAU_INSN_A(insn));
|
||||
TValue* kv = VM_KV(LUAU_INSN_D(insn));
|
||||
|
||||
Closure* kcl = clvalue(kv);
|
||||
|
||||
VM_PROTECT_PC(); // luaF_newLclosure may fail due to OOM
|
||||
|
||||
// clone closure if the environment is not shared
|
||||
// note: we save closure to stack early in case the code below wants to capture it by value
|
||||
Closure* ncl = (kcl->env == cl->env) ? kcl : luaF_newLclosure(L, kcl->nupvalues, cl->env, kcl->l.p);
|
||||
setclvalue(L, ra, ncl);
|
||||
|
||||
// this loop does three things:
|
||||
// - if the closure was created anew, it just fills it with upvalues
|
||||
// - if the closure from the constant table is used, it fills it with upvalues so that it can be shared in the future
|
||||
// - if the closure is reused, it checks if the reuse is safe via rawequal, and falls back to duplicating the closure
|
||||
// normally this would use two separate loops, for reuse check and upvalue setup, but MSVC codegen goes crazy if you do that
|
||||
for (int ui = 0; ui < kcl->nupvalues; ++ui)
|
||||
{
|
||||
Instruction uinsn = pc[ui];
|
||||
LUAU_ASSERT(LUAU_INSN_OP(uinsn) == LOP_CAPTURE);
|
||||
LUAU_ASSERT(LUAU_INSN_A(uinsn) == LCT_VAL || LUAU_INSN_A(uinsn) == LCT_UPVAL);
|
||||
|
||||
TValue* uv = (LUAU_INSN_A(uinsn) == LCT_VAL) ? VM_REG(LUAU_INSN_B(uinsn)) : VM_UV(LUAU_INSN_B(uinsn));
|
||||
|
||||
// check if the existing closure is safe to reuse
|
||||
if (ncl == kcl && luaO_rawequalObj(&ncl->l.uprefs[ui], uv))
|
||||
continue;
|
||||
|
||||
// lazily clone the closure and update the upvalues
|
||||
if (ncl == kcl && kcl->preload == 0)
|
||||
{
|
||||
ncl = luaF_newLclosure(L, kcl->nupvalues, cl->env, kcl->l.p);
|
||||
setclvalue(L, ra, ncl);
|
||||
|
||||
ui = -1; // restart the loop to fill all upvalues
|
||||
continue;
|
||||
}
|
||||
|
||||
// this updates a newly created closure, or an existing closure created during preload, in which case we need a barrier
|
||||
setobj(L, &ncl->l.uprefs[ui], uv);
|
||||
luaC_barrier(L, ncl, uv);
|
||||
}
|
||||
|
||||
// this is a noop if ncl is newly created or shared successfully, but it has to run after the closure is preloaded for the first time
|
||||
ncl->preload = 0;
|
||||
|
||||
if (kcl != ncl)
|
||||
VM_PROTECT(luaC_checkGC(L));
|
||||
|
||||
pc += kcl->nupvalues;
|
||||
return pc;
|
||||
}
|
||||
|
||||
const Instruction* executePREPVARARGS(lua_State* L, const Instruction* pc, StkId base, TValue* k)
|
||||
{
|
||||
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
|
||||
Instruction insn = *pc++;
|
||||
int numparams = LUAU_INSN_A(insn);
|
||||
|
||||
// all fixed parameters are copied after the top so we need more stack space
|
||||
VM_PROTECT(luaD_checkstack(L, cl->stacksize + numparams));
|
||||
|
||||
// the caller must have filled extra fixed arguments with nil
|
||||
LUAU_ASSERT(cast_int(L->top - base) >= numparams);
|
||||
|
||||
// move fixed parameters to final position
|
||||
StkId fixed = base; // first fixed argument
|
||||
base = L->top; // final position of first argument
|
||||
|
||||
for (int i = 0; i < numparams; ++i)
|
||||
{
|
||||
setobj2s(L, base + i, fixed + i);
|
||||
setnilvalue(fixed + i);
|
||||
}
|
||||
|
||||
// rewire our stack frame to point to the new base
|
||||
L->ci->base = base;
|
||||
L->ci->top = base + cl->stacksize;
|
||||
|
||||
L->base = base;
|
||||
L->top = L->ci->top;
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,36 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "lobject.h"
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
bool forgLoopTableIter(lua_State* L, Table* h, int index, TValue* ra);
|
||||
bool forgLoopNodeIter(lua_State* L, Table* h, int index, TValue* ra);
|
||||
bool forgLoopNonTableFallback(lua_State* L, int insnA, int aux);
|
||||
|
||||
void forgPrepXnextFallback(lua_State* L, TValue* ra, int pc);
|
||||
|
||||
Closure* callProlog(lua_State* L, TValue* ra, StkId argtop, int nresults);
|
||||
void callEpilogC(lua_State* L, int nresults, int n);
|
||||
|
||||
Closure* callFallback(lua_State* L, StkId ra, StkId argtop, int nresults);
|
||||
Closure* returnFallback(lua_State* L, StkId ra, StkId valend);
|
||||
|
||||
const Instruction* executeGETGLOBAL(lua_State* L, const Instruction* pc, StkId base, TValue* k);
|
||||
const Instruction* executeSETGLOBAL(lua_State* L, const Instruction* pc, StkId base, TValue* k);
|
||||
const Instruction* executeGETTABLEKS(lua_State* L, const Instruction* pc, StkId base, TValue* k);
|
||||
const Instruction* executeSETTABLEKS(lua_State* L, const Instruction* pc, StkId base, TValue* k);
|
||||
const Instruction* executeNEWCLOSURE(lua_State* L, const Instruction* pc, StkId base, TValue* k);
|
||||
const Instruction* executeNAMECALL(lua_State* L, const Instruction* pc, StkId base, TValue* k);
|
||||
const Instruction* executeSETLIST(lua_State* L, const Instruction* pc, StkId base, TValue* k);
|
||||
const Instruction* executeFORGPREP(lua_State* L, const Instruction* pc, StkId base, TValue* k);
|
||||
const Instruction* executeGETVARARGS(lua_State* L, const Instruction* pc, StkId base, TValue* k);
|
||||
const Instruction* executeDUPCLOSURE(lua_State* L, const Instruction* pc, StkId base, TValue* k);
|
||||
const Instruction* executePREPVARARGS(lua_State* L, const Instruction* pc, StkId base, TValue* k);
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,197 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "CodeGenX64.h"
|
||||
|
||||
#include "Luau/AssemblyBuilderX64.h"
|
||||
#include "Luau/UnwindBuilder.h"
|
||||
|
||||
#include "CustomExecUtils.h"
|
||||
#include "NativeState.h"
|
||||
#include "EmitCommonX64.h"
|
||||
|
||||
#include "lstate.h"
|
||||
|
||||
/* An overview of native environment stack setup that we are making in the entry function:
|
||||
* Each line is 8 bytes, stack grows downwards.
|
||||
*
|
||||
* | ... previous frames ...
|
||||
* | rdx home space | (unused)
|
||||
* | rcx home space | (unused)
|
||||
* | return address |
|
||||
* | ... saved non-volatile registers ... <-- rsp + kStackSize + kLocalsSize
|
||||
* | unused | for 16 byte alignment of the stack
|
||||
* | sCode |
|
||||
* | sClosure | <-- rsp + kStackSize
|
||||
* | argument 6 | <-- rsp + 40
|
||||
* | argument 5 | <-- rsp + 32
|
||||
* | r9 home space |
|
||||
* | r8 home space |
|
||||
* | rdx home space |
|
||||
* | rcx home space | <-- rsp points here
|
||||
*
|
||||
* Arguments to our entry function are saved to home space only on Windows.
|
||||
* Space for arguments to function we call is always reserved, but used only on Windows.
|
||||
*
|
||||
* Right now we use a frame pointer, but because of a fixed layout we can omit it in the future
|
||||
*/
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
namespace X64
|
||||
{
|
||||
|
||||
struct EntryLocations
|
||||
{
|
||||
Label start;
|
||||
Label prologueEnd;
|
||||
Label epilogueStart;
|
||||
};
|
||||
|
||||
static EntryLocations buildEntryFunction(AssemblyBuilderX64& build, UnwindBuilder& unwind)
|
||||
{
|
||||
EntryLocations locations;
|
||||
|
||||
build.align(kFunctionAlignment, X64::AlignmentDataX64::Ud2);
|
||||
|
||||
locations.start = build.setLabel();
|
||||
unwind.startFunction();
|
||||
|
||||
// Save common non-volatile registers
|
||||
if (build.abi == ABIX64::SystemV)
|
||||
{
|
||||
// We need to use a standard rbp-based frame setup for debuggers to work with JIT code
|
||||
build.push(rbp);
|
||||
build.mov(rbp, rsp);
|
||||
}
|
||||
|
||||
build.push(rbx);
|
||||
build.push(r12);
|
||||
build.push(r13);
|
||||
build.push(r14);
|
||||
build.push(r15);
|
||||
|
||||
if (build.abi == ABIX64::Windows)
|
||||
{
|
||||
// Save non-volatile registers that are specific to Windows x64 ABI
|
||||
build.push(rdi);
|
||||
build.push(rsi);
|
||||
|
||||
// On Windows, rbp is available as a general-purpose non-volatile register; we currently don't use it, but we need to push an even number
|
||||
// of registers for stack alignment...
|
||||
build.push(rbp);
|
||||
|
||||
// TODO: once we start using non-volatile SIMD registers on Windows, we will save those here
|
||||
}
|
||||
|
||||
// Allocate stack space (reg home area + local data)
|
||||
build.sub(rsp, kStackSize + kLocalsSize);
|
||||
|
||||
locations.prologueEnd = build.setLabel();
|
||||
|
||||
uint32_t prologueSize = build.getLabelOffset(locations.prologueEnd) - build.getLabelOffset(locations.start);
|
||||
|
||||
if (build.abi == ABIX64::SystemV)
|
||||
unwind.prologueX64(prologueSize, kStackSize + kLocalsSize, /* setupFrame= */ true, {rbx, r12, r13, r14, r15});
|
||||
else if (build.abi == ABIX64::Windows)
|
||||
unwind.prologueX64(prologueSize, kStackSize + kLocalsSize, /* setupFrame= */ false, {rbx, r12, r13, r14, r15, rdi, rsi, rbp});
|
||||
|
||||
// Setup native execution environment
|
||||
build.mov(rState, rArg1);
|
||||
build.mov(rNativeContext, rArg4);
|
||||
build.mov(rBase, qword[rState + offsetof(lua_State, base)]); // L->base
|
||||
build.mov(rax, qword[rState + offsetof(lua_State, ci)]); // L->ci
|
||||
build.mov(rax, qword[rax + offsetof(CallInfo, func)]); // L->ci->func
|
||||
build.mov(rax, qword[rax + offsetof(TValue, value.gc)]); // L->ci->func->value.gc aka cl
|
||||
build.mov(sClosure, rax);
|
||||
build.mov(rConstants, qword[rArg2 + offsetof(Proto, k)]); // proto->k
|
||||
build.mov(rax, qword[rArg2 + offsetof(Proto, code)]); // proto->code
|
||||
build.mov(sCode, rax);
|
||||
|
||||
// Jump to the specified instruction; further control flow will be handled with custom ABI with register setup from EmitCommonX64.h
|
||||
build.jmp(rArg3);
|
||||
|
||||
// Even though we jumped away, we will return here in the end
|
||||
locations.epilogueStart = build.setLabel();
|
||||
|
||||
// Cleanup and exit
|
||||
build.add(rsp, kStackSize + kLocalsSize);
|
||||
|
||||
if (build.abi == ABIX64::Windows)
|
||||
{
|
||||
build.pop(rbp);
|
||||
build.pop(rsi);
|
||||
build.pop(rdi);
|
||||
}
|
||||
|
||||
build.pop(r15);
|
||||
build.pop(r14);
|
||||
build.pop(r13);
|
||||
build.pop(r12);
|
||||
build.pop(rbx);
|
||||
|
||||
if (build.abi == ABIX64::SystemV)
|
||||
build.pop(rbp);
|
||||
|
||||
build.ret();
|
||||
|
||||
// Our entry function is special, it spans the whole remaining code area
|
||||
unwind.finishFunction(build.getLabelOffset(locations.start), kFullBlockFuncton);
|
||||
|
||||
return locations;
|
||||
}
|
||||
|
||||
bool initHeaderFunctions(NativeState& data)
|
||||
{
|
||||
AssemblyBuilderX64 build(/* logText= */ false);
|
||||
UnwindBuilder& unwind = *data.unwindBuilder.get();
|
||||
|
||||
unwind.startInfo(UnwindBuilder::X64);
|
||||
|
||||
EntryLocations entryLocations = buildEntryFunction(build, unwind);
|
||||
|
||||
build.finalize();
|
||||
|
||||
unwind.finishInfo();
|
||||
|
||||
LUAU_ASSERT(build.data.empty());
|
||||
|
||||
uint8_t* codeStart = nullptr;
|
||||
if (!data.codeAllocator.allocate(
|
||||
build.data.data(), int(build.data.size()), build.code.data(), int(build.code.size()), data.gateData, data.gateDataSize, codeStart))
|
||||
{
|
||||
LUAU_ASSERT(!"failed to create entry function");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set the offset at the begining so that functions in new blocks will not overlay the locations
|
||||
// specified by the unwind information of the entry function
|
||||
unwind.setBeginOffset(build.getLabelOffset(entryLocations.prologueEnd));
|
||||
|
||||
data.context.gateEntry = codeStart + build.getLabelOffset(entryLocations.start);
|
||||
data.context.gateExit = codeStart + build.getLabelOffset(entryLocations.epilogueStart);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void assembleHelpers(X64::AssemblyBuilderX64& build, ModuleHelpers& helpers)
|
||||
{
|
||||
if (build.logText)
|
||||
build.logAppend("; exitContinueVm\n");
|
||||
helpers.exitContinueVm = build.setLabel();
|
||||
emitExit(build, /* continueInVm */ true);
|
||||
|
||||
if (build.logText)
|
||||
build.logAppend("; exitNoContinueVm\n");
|
||||
helpers.exitNoContinueVm = build.setLabel();
|
||||
emitExit(build, /* continueInVm */ false);
|
||||
|
||||
if (build.logText)
|
||||
build.logAppend("; continueCallInVm\n");
|
||||
helpers.continueCallInVm = build.setLabel();
|
||||
emitContinueCallInVm(build);
|
||||
}
|
||||
|
||||
} // namespace X64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,22 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
struct NativeState;
|
||||
struct ModuleHelpers;
|
||||
|
||||
namespace X64
|
||||
{
|
||||
|
||||
class AssemblyBuilderX64;
|
||||
|
||||
bool initHeaderFunctions(NativeState& data);
|
||||
void assembleHelpers(AssemblyBuilderX64& build, ModuleHelpers& helpers);
|
||||
|
||||
} // namespace X64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,106 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "NativeState.h"
|
||||
|
||||
#include "lobject.h"
|
||||
#include "lstate.h"
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
// Here we define helper functions to wrap interaction with Luau custom execution API so that it works with or without LUA_CUSTOM_EXECUTION
|
||||
|
||||
#if LUA_CUSTOM_EXECUTION
|
||||
|
||||
inline lua_ExecutionCallbacks* getExecutionCallbacks(lua_State* L)
|
||||
{
|
||||
return &L->global->ecb;
|
||||
}
|
||||
|
||||
inline NativeState* getNativeState(lua_State* L)
|
||||
{
|
||||
lua_ExecutionCallbacks* ecb = getExecutionCallbacks(L);
|
||||
return (NativeState*)ecb->context;
|
||||
}
|
||||
|
||||
inline void setNativeState(lua_State* L, NativeState* nativeState)
|
||||
{
|
||||
lua_ExecutionCallbacks* ecb = getExecutionCallbacks(L);
|
||||
ecb->context = nativeState;
|
||||
}
|
||||
|
||||
inline NativeState* createNativeState(lua_State* L)
|
||||
{
|
||||
NativeState* state = new NativeState();
|
||||
setNativeState(L, state);
|
||||
return state;
|
||||
}
|
||||
|
||||
inline void destroyNativeState(lua_State* L)
|
||||
{
|
||||
NativeState* state = getNativeState(L);
|
||||
setNativeState(L, nullptr);
|
||||
delete state;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
inline lua_ExecutionCallbacks* getExecutionCallbacks(lua_State* L)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
inline NativeState* getNativeState(lua_State* L)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
inline void setNativeState(lua_State* L, NativeState* nativeState) {}
|
||||
|
||||
inline NativeState* createNativeState(lua_State* L)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
inline void destroyNativeState(lua_State* L) {}
|
||||
|
||||
#endif
|
||||
|
||||
inline int getOpLength(LuauOpcode op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case LOP_GETGLOBAL:
|
||||
case LOP_SETGLOBAL:
|
||||
case LOP_GETIMPORT:
|
||||
case LOP_GETTABLEKS:
|
||||
case LOP_SETTABLEKS:
|
||||
case LOP_NAMECALL:
|
||||
case LOP_JUMPIFEQ:
|
||||
case LOP_JUMPIFLE:
|
||||
case LOP_JUMPIFLT:
|
||||
case LOP_JUMPIFNOTEQ:
|
||||
case LOP_JUMPIFNOTLE:
|
||||
case LOP_JUMPIFNOTLT:
|
||||
case LOP_NEWTABLE:
|
||||
case LOP_SETLIST:
|
||||
case LOP_FORGLOOP:
|
||||
case LOP_LOADKX:
|
||||
case LOP_FASTCALL2:
|
||||
case LOP_FASTCALL2K:
|
||||
case LOP_JUMPXEQKNIL:
|
||||
case LOP_JUMPXEQKB:
|
||||
case LOP_JUMPXEQKN:
|
||||
case LOP_JUMPXEQKS:
|
||||
return 2;
|
||||
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,128 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "EmitBuiltinsX64.h"
|
||||
|
||||
#include "Luau/AssemblyBuilderX64.h"
|
||||
#include "Luau/Bytecode.h"
|
||||
#include "Luau/IrCallWrapperX64.h"
|
||||
#include "Luau/IrRegAllocX64.h"
|
||||
|
||||
#include "EmitCommonX64.h"
|
||||
#include "NativeState.h"
|
||||
|
||||
#include "lstate.h"
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
namespace X64
|
||||
{
|
||||
|
||||
static void emitBuiltinMathFrexp(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int arg, int nresults)
|
||||
{
|
||||
IrCallWrapperX64 callWrap(regs, build);
|
||||
callWrap.addArgument(SizeX64::xmmword, luauRegValue(arg));
|
||||
callWrap.addArgument(SizeX64::qword, sTemporarySlot);
|
||||
callWrap.call(qword[rNativeContext + offsetof(NativeContext, libm_frexp)]);
|
||||
|
||||
build.vmovsd(luauRegValue(ra), xmm0);
|
||||
|
||||
if (nresults > 1)
|
||||
{
|
||||
build.vcvtsi2sd(xmm0, xmm0, dword[sTemporarySlot + 0]);
|
||||
build.vmovsd(luauRegValue(ra + 1), xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
static void emitBuiltinMathModf(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int arg, int nresults)
|
||||
{
|
||||
IrCallWrapperX64 callWrap(regs, build);
|
||||
callWrap.addArgument(SizeX64::xmmword, luauRegValue(arg));
|
||||
callWrap.addArgument(SizeX64::qword, sTemporarySlot);
|
||||
callWrap.call(qword[rNativeContext + offsetof(NativeContext, libm_modf)]);
|
||||
|
||||
build.vmovsd(xmm1, qword[sTemporarySlot + 0]);
|
||||
build.vmovsd(luauRegValue(ra), xmm1);
|
||||
|
||||
if (nresults > 1)
|
||||
build.vmovsd(luauRegValue(ra + 1), xmm0);
|
||||
}
|
||||
|
||||
static void emitBuiltinMathSign(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int arg)
|
||||
{
|
||||
ScopedRegX64 tmp0{regs, SizeX64::xmmword};
|
||||
ScopedRegX64 tmp1{regs, SizeX64::xmmword};
|
||||
ScopedRegX64 tmp2{regs, SizeX64::xmmword};
|
||||
ScopedRegX64 tmp3{regs, SizeX64::xmmword};
|
||||
|
||||
build.vmovsd(tmp0.reg, luauRegValue(arg));
|
||||
build.vxorpd(tmp1.reg, tmp1.reg, tmp1.reg);
|
||||
|
||||
// Set tmp2 to -1 if arg < 0, else 0
|
||||
build.vcmpltsd(tmp2.reg, tmp0.reg, tmp1.reg);
|
||||
build.vmovsd(tmp3.reg, build.f64(-1));
|
||||
build.vandpd(tmp2.reg, tmp2.reg, tmp3.reg);
|
||||
|
||||
// Set mask bit to 1 if 0 < arg, else 0
|
||||
build.vcmpltsd(tmp0.reg, tmp1.reg, tmp0.reg);
|
||||
|
||||
// Result = (mask-bit == 1) ? 1.0 : tmp2
|
||||
// If arg < 0 then tmp2 is -1 and mask-bit is 0, result is -1
|
||||
// If arg == 0 then tmp2 is 0 and mask-bit is 0, result is 0
|
||||
// If arg > 0 then tmp2 is 0 and mask-bit is 1, result is 1
|
||||
build.vblendvpd(tmp0.reg, tmp2.reg, build.f64x2(1, 1), tmp0.reg);
|
||||
|
||||
build.vmovsd(luauRegValue(ra), tmp0.reg);
|
||||
}
|
||||
|
||||
static void emitBuiltinType(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int arg)
|
||||
{
|
||||
ScopedRegX64 tmp0{regs, SizeX64::qword};
|
||||
ScopedRegX64 tag{regs, SizeX64::dword};
|
||||
|
||||
build.mov(tag.reg, luauRegTag(arg));
|
||||
|
||||
build.mov(tmp0.reg, qword[rState + offsetof(lua_State, global)]);
|
||||
build.mov(tmp0.reg, qword[tmp0.reg + qwordReg(tag.reg) * sizeof(TString*) + offsetof(global_State, ttname)]);
|
||||
|
||||
build.mov(luauRegValue(ra), tmp0.reg);
|
||||
}
|
||||
|
||||
static void emitBuiltinTypeof(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int arg)
|
||||
{
|
||||
IrCallWrapperX64 callWrap(regs, build);
|
||||
callWrap.addArgument(SizeX64::qword, rState);
|
||||
callWrap.addArgument(SizeX64::qword, luauRegAddress(arg));
|
||||
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaT_objtypenamestr)]);
|
||||
|
||||
build.mov(luauRegValue(ra), rax);
|
||||
}
|
||||
|
||||
void emitBuiltin(IrRegAllocX64& regs, AssemblyBuilderX64& build, int bfid, int ra, int arg, OperandX64 arg2, int nparams, int nresults)
|
||||
{
|
||||
switch (bfid)
|
||||
{
|
||||
case LBF_MATH_FREXP:
|
||||
LUAU_ASSERT(nparams == 1 && (nresults == 1 || nresults == 2));
|
||||
return emitBuiltinMathFrexp(regs, build, ra, arg, nresults);
|
||||
case LBF_MATH_MODF:
|
||||
LUAU_ASSERT(nparams == 1 && (nresults == 1 || nresults == 2));
|
||||
return emitBuiltinMathModf(regs, build, ra, arg, nresults);
|
||||
case LBF_MATH_SIGN:
|
||||
LUAU_ASSERT(nparams == 1 && nresults == 1);
|
||||
return emitBuiltinMathSign(regs, build, ra, arg);
|
||||
case LBF_TYPE:
|
||||
LUAU_ASSERT(nparams == 1 && nresults == 1);
|
||||
return emitBuiltinType(regs, build, ra, arg);
|
||||
case LBF_TYPEOF:
|
||||
LUAU_ASSERT(nparams == 1 && nresults == 1);
|
||||
return emitBuiltinTypeof(regs, build, ra, arg);
|
||||
default:
|
||||
LUAU_ASSERT(!"Missing x64 lowering");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace X64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,23 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
struct Label;
|
||||
struct IrOp;
|
||||
|
||||
namespace X64
|
||||
{
|
||||
|
||||
class AssemblyBuilderX64;
|
||||
struct OperandX64;
|
||||
struct IrRegAllocX64;
|
||||
|
||||
void emitBuiltin(IrRegAllocX64& regs, AssemblyBuilderX64& build, int bfid, int ra, int arg, OperandX64 arg2, int nparams, int nresults);
|
||||
|
||||
} // namespace X64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,37 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/Label.h"
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
constexpr unsigned kTValueSizeLog2 = 4;
|
||||
constexpr unsigned kLuaNodeSizeLog2 = 5;
|
||||
|
||||
// TKey.tt and TKey.next are packed together in a bitfield
|
||||
constexpr unsigned kOffsetOfTKeyTagNext = 12; // offsetof cannot be used on a bit field
|
||||
constexpr unsigned kTKeyTagBits = 4;
|
||||
constexpr unsigned kTKeyTagMask = (1 << kTKeyTagBits) - 1;
|
||||
|
||||
constexpr unsigned kOffsetOfInstructionC = 3;
|
||||
|
||||
// Leaf functions that are placed in every module to perform common instruction sequences
|
||||
struct ModuleHelpers
|
||||
{
|
||||
// A64/X64
|
||||
Label exitContinueVm;
|
||||
Label exitNoContinueVm;
|
||||
|
||||
// X64
|
||||
Label continueCallInVm;
|
||||
|
||||
// A64
|
||||
Label reentry; // x0: closure
|
||||
Label interrupt; // x0: pc offset, x1: return address, x2: interrupt
|
||||
};
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,59 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/AssemblyBuilderA64.h"
|
||||
|
||||
#include "EmitCommon.h"
|
||||
|
||||
#include "lobject.h"
|
||||
#include "ltm.h"
|
||||
#include "lstate.h"
|
||||
|
||||
// AArch64 ABI reminder:
|
||||
// Arguments: x0-x7, v0-v7
|
||||
// Return: x0, v0 (or x8 that points to the address of the resulting structure)
|
||||
// Volatile: x9-x15, v16-v31 ("caller-saved", any call may change them)
|
||||
// Intra-procedure-call temporary: x16-x17 (any call or relocated jump may change them, as linker may point branches to veneers to perform long jumps)
|
||||
// Non-volatile: x19-x28, v8-v15 ("callee-saved", preserved after calls, only bottom half of SIMD registers is preserved!)
|
||||
// Reserved: x18: reserved for platform use; x29: frame pointer (unless omitted); x30: link register; x31: stack pointer
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
struct NativeState;
|
||||
|
||||
namespace A64
|
||||
{
|
||||
|
||||
// Data that is very common to access is placed in non-volatile registers:
|
||||
// 1. Constant registers (only loaded during codegen entry)
|
||||
constexpr RegisterA64 rState = x19; // lua_State* L
|
||||
constexpr RegisterA64 rNativeContext = x20; // NativeContext* context
|
||||
|
||||
// 2. Frame registers (reloaded when call frame changes; rBase is also reloaded after all calls that may reallocate stack)
|
||||
constexpr RegisterA64 rConstants = x21; // TValue* k
|
||||
constexpr RegisterA64 rClosure = x22; // Closure* cl
|
||||
constexpr RegisterA64 rCode = x23; // Instruction* code
|
||||
constexpr RegisterA64 rBase = x24; // StkId base
|
||||
|
||||
// Native code is as stackless as the interpreter, so we can place some data on the stack once and have it accessible at any point
|
||||
// See CodeGenA64.cpp for layout
|
||||
constexpr unsigned kStashSlots = 8; // stashed non-volatile registers
|
||||
constexpr unsigned kSpillSlots = 22; // slots for spilling temporary registers
|
||||
constexpr unsigned kTempSlots = 2; // 16 bytes of temporary space, such luxury!
|
||||
|
||||
constexpr unsigned kStackSize = (kStashSlots + kSpillSlots + kTempSlots) * 8;
|
||||
|
||||
constexpr AddressA64 sSpillArea = mem(sp, kStashSlots * 8);
|
||||
constexpr AddressA64 sTemporary = mem(sp, (kStashSlots + kSpillSlots) * 8);
|
||||
|
||||
inline void emitUpdateBase(AssemblyBuilderA64& build)
|
||||
{
|
||||
build.ldr(rBase, mem(rState, offsetof(lua_State, base)));
|
||||
}
|
||||
|
||||
} // namespace A64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,358 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "EmitCommonX64.h"
|
||||
|
||||
#include "Luau/AssemblyBuilderX64.h"
|
||||
#include "Luau/IrCallWrapperX64.h"
|
||||
#include "Luau/IrData.h"
|
||||
#include "Luau/IrRegAllocX64.h"
|
||||
|
||||
#include "CustomExecUtils.h"
|
||||
#include "NativeState.h"
|
||||
|
||||
#include "lgc.h"
|
||||
#include "lstate.h"
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
namespace X64
|
||||
{
|
||||
|
||||
void jumpOnNumberCmp(AssemblyBuilderX64& build, RegisterX64 tmp, OperandX64 lhs, OperandX64 rhs, IrCondition cond, Label& label)
|
||||
{
|
||||
// Refresher on comi/ucomi EFLAGS:
|
||||
// CF only: less
|
||||
// ZF only: equal
|
||||
// PF+CF+ZF: unordered (NaN)
|
||||
|
||||
if (rhs.cat == CategoryX64::reg)
|
||||
{
|
||||
build.vucomisd(rhs, lhs);
|
||||
}
|
||||
else
|
||||
{
|
||||
build.vmovsd(tmp, rhs);
|
||||
build.vucomisd(tmp, lhs);
|
||||
}
|
||||
|
||||
// Keep in mind that 'Not' conditions want 'true' for comparisons with NaN
|
||||
// And because of NaN, integer check interchangeability like 'not less or equal' <-> 'greater' does not hold
|
||||
switch (cond)
|
||||
{
|
||||
case IrCondition::NotLessEqual:
|
||||
// (b < a) is the same as !(a <= b). jnae checks CF=1 which means < or NaN
|
||||
build.jcc(ConditionX64::NotAboveEqual, label);
|
||||
break;
|
||||
case IrCondition::LessEqual:
|
||||
// (b >= a) is the same as (a <= b). jae checks CF=0 which means >= and not NaN
|
||||
build.jcc(ConditionX64::AboveEqual, label);
|
||||
break;
|
||||
case IrCondition::NotLess:
|
||||
// (b <= a) is the same as !(a < b). jna checks CF=1 or ZF=1 which means <= or NaN
|
||||
build.jcc(ConditionX64::NotAbove, label);
|
||||
break;
|
||||
case IrCondition::Less:
|
||||
// (b > a) is the same as (a < b). ja checks CF=0 and ZF=0 which means > and not NaN
|
||||
build.jcc(ConditionX64::Above, label);
|
||||
break;
|
||||
case IrCondition::NotEqual:
|
||||
// ZF=0 or PF=1 means != or NaN
|
||||
build.jcc(ConditionX64::NotZero, label);
|
||||
build.jcc(ConditionX64::Parity, label);
|
||||
break;
|
||||
default:
|
||||
LUAU_ASSERT(!"Unsupported condition");
|
||||
}
|
||||
}
|
||||
|
||||
void jumpOnAnyCmpFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, IrCondition cond, Label& label)
|
||||
{
|
||||
IrCallWrapperX64 callWrap(regs, build);
|
||||
callWrap.addArgument(SizeX64::qword, rState);
|
||||
callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
|
||||
callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
|
||||
|
||||
if (cond == IrCondition::NotLessEqual || cond == IrCondition::LessEqual)
|
||||
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_lessequal)]);
|
||||
else if (cond == IrCondition::NotLess || cond == IrCondition::Less)
|
||||
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_lessthan)]);
|
||||
else if (cond == IrCondition::NotEqual || cond == IrCondition::Equal)
|
||||
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_equalval)]);
|
||||
else
|
||||
LUAU_ASSERT(!"Unsupported condition");
|
||||
|
||||
emitUpdateBase(build);
|
||||
build.test(eax, eax);
|
||||
build.jcc(cond == IrCondition::NotLessEqual || cond == IrCondition::NotLess || cond == IrCondition::NotEqual ? ConditionX64::Zero
|
||||
: ConditionX64::NotZero,
|
||||
label);
|
||||
}
|
||||
|
||||
void getTableNodeAtCachedSlot(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 node, RegisterX64 table, int pcpos)
|
||||
{
|
||||
LUAU_ASSERT(tmp != node);
|
||||
LUAU_ASSERT(table != node);
|
||||
|
||||
build.mov(node, qword[table + offsetof(Table, node)]);
|
||||
|
||||
// compute cached slot
|
||||
build.mov(tmp, sCode);
|
||||
build.movzx(dwordReg(tmp), byte[tmp + pcpos * sizeof(Instruction) + kOffsetOfInstructionC]);
|
||||
build.and_(byteReg(tmp), byte[table + offsetof(Table, nodemask8)]);
|
||||
|
||||
// LuaNode* n = &h->node[slot];
|
||||
build.shl(dwordReg(tmp), kLuaNodeSizeLog2);
|
||||
build.add(node, tmp);
|
||||
}
|
||||
|
||||
void convertNumberToIndexOrJump(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 numd, RegisterX64 numi, Label& label)
|
||||
{
|
||||
LUAU_ASSERT(numi.size == SizeX64::dword);
|
||||
|
||||
// Convert to integer, NaN is converted into 0x80000000
|
||||
build.vcvttsd2si(numi, numd);
|
||||
|
||||
// Convert that integer back to double
|
||||
build.vcvtsi2sd(tmp, numd, numi);
|
||||
|
||||
build.vucomisd(tmp, numd); // Sets ZF=1 if equal or NaN
|
||||
// We don't need non-integer values
|
||||
// But to skip the PF=1 check, we proceed with NaN because 0x80000000 index is out of bounds
|
||||
build.jcc(ConditionX64::NotZero, label);
|
||||
}
|
||||
|
||||
void callArithHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, OperandX64 c, TMS tm)
|
||||
{
|
||||
IrCallWrapperX64 callWrap(regs, build);
|
||||
callWrap.addArgument(SizeX64::qword, rState);
|
||||
callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
|
||||
callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
|
||||
callWrap.addArgument(SizeX64::qword, c);
|
||||
callWrap.addArgument(SizeX64::dword, tm);
|
||||
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_doarith)]);
|
||||
|
||||
emitUpdateBase(build);
|
||||
}
|
||||
|
||||
void callLengthHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb)
|
||||
{
|
||||
IrCallWrapperX64 callWrap(regs, build);
|
||||
callWrap.addArgument(SizeX64::qword, rState);
|
||||
callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
|
||||
callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
|
||||
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_dolen)]);
|
||||
|
||||
emitUpdateBase(build);
|
||||
}
|
||||
|
||||
void callPrepareForN(IrRegAllocX64& regs, AssemblyBuilderX64& build, int limit, int step, int init)
|
||||
{
|
||||
IrCallWrapperX64 callWrap(regs, build);
|
||||
callWrap.addArgument(SizeX64::qword, rState);
|
||||
callWrap.addArgument(SizeX64::qword, luauRegAddress(limit));
|
||||
callWrap.addArgument(SizeX64::qword, luauRegAddress(step));
|
||||
callWrap.addArgument(SizeX64::qword, luauRegAddress(init));
|
||||
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_prepareFORN)]);
|
||||
}
|
||||
|
||||
void callGetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra)
|
||||
{
|
||||
IrCallWrapperX64 callWrap(regs, build);
|
||||
callWrap.addArgument(SizeX64::qword, rState);
|
||||
callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
|
||||
callWrap.addArgument(SizeX64::qword, c);
|
||||
callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
|
||||
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_gettable)]);
|
||||
|
||||
emitUpdateBase(build);
|
||||
}
|
||||
|
||||
void callSetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra)
|
||||
{
|
||||
IrCallWrapperX64 callWrap(regs, build);
|
||||
callWrap.addArgument(SizeX64::qword, rState);
|
||||
callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
|
||||
callWrap.addArgument(SizeX64::qword, c);
|
||||
callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
|
||||
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_settable)]);
|
||||
|
||||
emitUpdateBase(build);
|
||||
}
|
||||
|
||||
void checkObjectBarrierConditions(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 object, int ra, Label& skip)
|
||||
{
|
||||
// iscollectable(ra)
|
||||
build.cmp(luauRegTag(ra), LUA_TSTRING);
|
||||
build.jcc(ConditionX64::Less, skip);
|
||||
|
||||
// isblack(obj2gco(o))
|
||||
build.test(byte[object + offsetof(GCheader, marked)], bitmask(BLACKBIT));
|
||||
build.jcc(ConditionX64::Zero, skip);
|
||||
|
||||
// iswhite(gcvalue(ra))
|
||||
build.mov(tmp, luauRegValue(ra));
|
||||
build.test(byte[tmp + offsetof(GCheader, marked)], bit2mask(WHITE0BIT, WHITE1BIT));
|
||||
build.jcc(ConditionX64::Zero, skip);
|
||||
}
|
||||
|
||||
void callBarrierObject(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 object, IrOp objectOp, int ra)
|
||||
{
|
||||
Label skip;
|
||||
|
||||
ScopedRegX64 tmp{regs, SizeX64::qword};
|
||||
checkObjectBarrierConditions(build, tmp.reg, object, ra, skip);
|
||||
|
||||
{
|
||||
ScopedSpills spillGuard(regs);
|
||||
|
||||
IrCallWrapperX64 callWrap(regs, build);
|
||||
callWrap.addArgument(SizeX64::qword, rState);
|
||||
callWrap.addArgument(SizeX64::qword, object, objectOp);
|
||||
callWrap.addArgument(SizeX64::qword, tmp);
|
||||
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_barrierf)]);
|
||||
}
|
||||
|
||||
build.setLabel(skip);
|
||||
}
|
||||
|
||||
void callBarrierTableFast(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 table, IrOp tableOp)
|
||||
{
|
||||
Label skip;
|
||||
|
||||
// isblack(obj2gco(t))
|
||||
build.test(byte[table + offsetof(GCheader, marked)], bitmask(BLACKBIT));
|
||||
build.jcc(ConditionX64::Zero, skip);
|
||||
|
||||
{
|
||||
ScopedSpills spillGuard(regs);
|
||||
|
||||
IrCallWrapperX64 callWrap(regs, build);
|
||||
callWrap.addArgument(SizeX64::qword, rState);
|
||||
callWrap.addArgument(SizeX64::qword, table, tableOp);
|
||||
callWrap.addArgument(SizeX64::qword, addr[table + offsetof(Table, gclist)]);
|
||||
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_barrierback)]);
|
||||
}
|
||||
|
||||
build.setLabel(skip);
|
||||
}
|
||||
|
||||
void callStepGc(IrRegAllocX64& regs, AssemblyBuilderX64& build)
|
||||
{
|
||||
Label skip;
|
||||
|
||||
{
|
||||
ScopedRegX64 tmp1{regs, SizeX64::qword};
|
||||
ScopedRegX64 tmp2{regs, SizeX64::qword};
|
||||
|
||||
build.mov(tmp1.reg, qword[rState + offsetof(lua_State, global)]);
|
||||
build.mov(tmp2.reg, qword[tmp1.reg + offsetof(global_State, totalbytes)]);
|
||||
build.cmp(tmp2.reg, qword[tmp1.reg + offsetof(global_State, GCthreshold)]);
|
||||
build.jcc(ConditionX64::Below, skip);
|
||||
}
|
||||
|
||||
{
|
||||
ScopedSpills spillGuard(regs);
|
||||
|
||||
IrCallWrapperX64 callWrap(regs, build);
|
||||
callWrap.addArgument(SizeX64::qword, rState);
|
||||
callWrap.addArgument(SizeX64::dword, 1);
|
||||
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_step)]);
|
||||
emitUpdateBase(build);
|
||||
}
|
||||
|
||||
build.setLabel(skip);
|
||||
}
|
||||
|
||||
void emitExit(AssemblyBuilderX64& build, bool continueInVm)
|
||||
{
|
||||
if (continueInVm)
|
||||
build.mov(eax, 1);
|
||||
else
|
||||
build.xor_(eax, eax);
|
||||
|
||||
build.jmp(qword[rNativeContext + offsetof(NativeContext, gateExit)]);
|
||||
}
|
||||
|
||||
void emitUpdateBase(AssemblyBuilderX64& build)
|
||||
{
|
||||
build.mov(rBase, qword[rState + offsetof(lua_State, base)]);
|
||||
}
|
||||
|
||||
static void emitSetSavedPc(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos)
|
||||
{
|
||||
ScopedRegX64 tmp1{regs, SizeX64::qword};
|
||||
ScopedRegX64 tmp2{regs, SizeX64::qword};
|
||||
|
||||
build.mov(tmp1.reg, sCode);
|
||||
build.add(tmp1.reg, pcpos * sizeof(Instruction));
|
||||
build.mov(tmp2.reg, qword[rState + offsetof(lua_State, ci)]);
|
||||
build.mov(qword[tmp2.reg + offsetof(CallInfo, savedpc)], tmp1.reg);
|
||||
}
|
||||
|
||||
void emitInterrupt(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos)
|
||||
{
|
||||
Label skip;
|
||||
|
||||
ScopedRegX64 tmp{regs, SizeX64::qword};
|
||||
|
||||
// Skip if there is no interrupt set
|
||||
build.mov(tmp.reg, qword[rState + offsetof(lua_State, global)]);
|
||||
build.mov(tmp.reg, qword[tmp.reg + offsetof(global_State, cb.interrupt)]);
|
||||
build.test(tmp.reg, tmp.reg);
|
||||
build.jcc(ConditionX64::Zero, skip);
|
||||
|
||||
emitSetSavedPc(regs, build, pcpos + 1);
|
||||
|
||||
// Call interrupt
|
||||
// TODO: This code should move to the end of the function, or even be outlined so that it can be shared by multiple interruptible instructions
|
||||
IrCallWrapperX64 callWrap(regs, build);
|
||||
callWrap.addArgument(SizeX64::qword, rState);
|
||||
callWrap.addArgument(SizeX64::dword, -1);
|
||||
callWrap.call(tmp.release());
|
||||
|
||||
emitUpdateBase(build); // interrupt may have reallocated stack
|
||||
|
||||
// Check if we need to exit
|
||||
build.mov(al, byte[rState + offsetof(lua_State, status)]);
|
||||
build.test(al, al);
|
||||
build.jcc(ConditionX64::Zero, skip);
|
||||
|
||||
build.mov(rax, qword[rState + offsetof(lua_State, ci)]);
|
||||
build.sub(qword[rax + offsetof(CallInfo, savedpc)], sizeof(Instruction));
|
||||
emitExit(build, /* continueInVm */ false);
|
||||
|
||||
build.setLabel(skip);
|
||||
}
|
||||
|
||||
void emitFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int offset, int pcpos)
|
||||
{
|
||||
// fallback(L, instruction, base, k)
|
||||
IrCallWrapperX64 callWrap(regs, build);
|
||||
callWrap.addArgument(SizeX64::qword, rState);
|
||||
|
||||
RegisterX64 reg = callWrap.suggestNextArgumentRegister(SizeX64::qword);
|
||||
build.mov(reg, sCode);
|
||||
callWrap.addArgument(SizeX64::qword, addr[reg + pcpos * sizeof(Instruction)]);
|
||||
|
||||
callWrap.addArgument(SizeX64::qword, rBase);
|
||||
callWrap.addArgument(SizeX64::qword, rConstants);
|
||||
callWrap.call(qword[rNativeContext + offset]);
|
||||
|
||||
emitUpdateBase(build);
|
||||
}
|
||||
|
||||
void emitContinueCallInVm(AssemblyBuilderX64& build)
|
||||
{
|
||||
RegisterX64 proto = rcx; // Sync with emitInstCall
|
||||
|
||||
build.mov(rdx, qword[proto + offsetof(Proto, code)]);
|
||||
build.mov(rax, qword[rState + offsetof(lua_State, ci)]);
|
||||
build.mov(qword[rax + offsetof(CallInfo, savedpc)], rdx);
|
||||
|
||||
emitExit(build, /* continueInVm */ true);
|
||||
}
|
||||
|
||||
} // namespace X64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,239 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/AssemblyBuilderX64.h"
|
||||
|
||||
#include "EmitCommon.h"
|
||||
|
||||
#include "lobject.h"
|
||||
#include "ltm.h"
|
||||
|
||||
// MS x64 ABI reminder:
|
||||
// Arguments: rcx, rdx, r8, r9 ('overlapped' with xmm0-xmm3)
|
||||
// Return: rax, xmm0
|
||||
// Nonvolatile: r12-r15, rdi, rsi, rbx, rbp
|
||||
// SIMD: only xmm6-xmm15 are non-volatile, all ymm upper parts are volatile
|
||||
|
||||
// AMD64 ABI reminder:
|
||||
// Arguments: rdi, rsi, rdx, rcx, r8, r9 (xmm0-xmm7)
|
||||
// Return: rax, rdx, xmm0, xmm1
|
||||
// Nonvolatile: r12-r15, rbx, rbp
|
||||
// SIMD: all volatile
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
enum class IrCondition : uint8_t;
|
||||
struct NativeState;
|
||||
struct IrOp;
|
||||
|
||||
namespace X64
|
||||
{
|
||||
|
||||
struct IrRegAllocX64;
|
||||
|
||||
constexpr uint32_t kFunctionAlignment = 32;
|
||||
|
||||
// Data that is very common to access is placed in non-volatile registers
|
||||
constexpr RegisterX64 rState = r15; // lua_State* L
|
||||
constexpr RegisterX64 rBase = r14; // StkId base
|
||||
constexpr RegisterX64 rNativeContext = r13; // NativeContext* context
|
||||
constexpr RegisterX64 rConstants = r12; // TValue* k
|
||||
|
||||
// Native code is as stackless as the interpreter, so we can place some data on the stack once and have it accessible at any point
|
||||
// See CodeGenX64.cpp for layout
|
||||
constexpr unsigned kStackSize = 32 + 16; // 4 home locations for registers, 16 bytes for additional function call arguments
|
||||
constexpr unsigned kSpillSlots = 4; // locations for register allocator to spill data into
|
||||
constexpr unsigned kLocalsSize = 24 + 8 * kSpillSlots; // 3 extra slots for our custom locals (also aligns the stack to 16 byte boundary)
|
||||
|
||||
constexpr OperandX64 sClosure = qword[rsp + kStackSize + 0]; // Closure* cl
|
||||
constexpr OperandX64 sCode = qword[rsp + kStackSize + 8]; // Instruction* code
|
||||
constexpr OperandX64 sTemporarySlot = addr[rsp + kStackSize + 16];
|
||||
constexpr OperandX64 sSpillArea = addr[rsp + kStackSize + 24];
|
||||
|
||||
// TODO: These should be replaced with a portable call function that checks the ABI at runtime and reorders moves accordingly to avoid conflicts
|
||||
#if defined(_WIN32)
|
||||
|
||||
constexpr RegisterX64 rArg1 = rcx;
|
||||
constexpr RegisterX64 rArg2 = rdx;
|
||||
constexpr RegisterX64 rArg3 = r8;
|
||||
constexpr RegisterX64 rArg4 = r9;
|
||||
constexpr RegisterX64 rArg5 = noreg;
|
||||
constexpr RegisterX64 rArg6 = noreg;
|
||||
constexpr OperandX64 sArg5 = qword[rsp + 32];
|
||||
constexpr OperandX64 sArg6 = qword[rsp + 40];
|
||||
|
||||
#else
|
||||
|
||||
constexpr RegisterX64 rArg1 = rdi;
|
||||
constexpr RegisterX64 rArg2 = rsi;
|
||||
constexpr RegisterX64 rArg3 = rdx;
|
||||
constexpr RegisterX64 rArg4 = rcx;
|
||||
constexpr RegisterX64 rArg5 = r8;
|
||||
constexpr RegisterX64 rArg6 = r9;
|
||||
constexpr OperandX64 sArg5 = noreg;
|
||||
constexpr OperandX64 sArg6 = noreg;
|
||||
|
||||
#endif
|
||||
|
||||
inline OperandX64 luauReg(int ri)
|
||||
{
|
||||
return xmmword[rBase + ri * sizeof(TValue)];
|
||||
}
|
||||
|
||||
inline OperandX64 luauRegAddress(int ri)
|
||||
{
|
||||
return addr[rBase + ri * sizeof(TValue)];
|
||||
}
|
||||
|
||||
inline OperandX64 luauRegValue(int ri)
|
||||
{
|
||||
return qword[rBase + ri * sizeof(TValue) + offsetof(TValue, value)];
|
||||
}
|
||||
|
||||
inline OperandX64 luauRegTag(int ri)
|
||||
{
|
||||
return dword[rBase + ri * sizeof(TValue) + offsetof(TValue, tt)];
|
||||
}
|
||||
|
||||
inline OperandX64 luauRegValueInt(int ri)
|
||||
{
|
||||
return dword[rBase + ri * sizeof(TValue) + offsetof(TValue, value)];
|
||||
}
|
||||
|
||||
inline OperandX64 luauRegValueVector(int ri, int index)
|
||||
{
|
||||
return dword[rBase + ri * sizeof(TValue) + offsetof(TValue, value) + (sizeof(float) * index)];
|
||||
}
|
||||
|
||||
inline OperandX64 luauConstant(int ki)
|
||||
{
|
||||
return xmmword[rConstants + ki * sizeof(TValue)];
|
||||
}
|
||||
|
||||
inline OperandX64 luauConstantAddress(int ki)
|
||||
{
|
||||
return addr[rConstants + ki * sizeof(TValue)];
|
||||
}
|
||||
|
||||
inline OperandX64 luauConstantTag(int ki)
|
||||
{
|
||||
return dword[rConstants + ki * sizeof(TValue) + offsetof(TValue, tt)];
|
||||
}
|
||||
|
||||
inline OperandX64 luauConstantValue(int ki)
|
||||
{
|
||||
return qword[rConstants + ki * sizeof(TValue) + offsetof(TValue, value)];
|
||||
}
|
||||
|
||||
inline OperandX64 luauNodeKeyValue(RegisterX64 node)
|
||||
{
|
||||
return qword[node + offsetof(LuaNode, key) + offsetof(TKey, value)];
|
||||
}
|
||||
|
||||
// Note: tag has dirty upper bits
|
||||
inline OperandX64 luauNodeKeyTag(RegisterX64 node)
|
||||
{
|
||||
return dword[node + offsetof(LuaNode, key) + kOffsetOfTKeyTagNext];
|
||||
}
|
||||
|
||||
inline OperandX64 luauNodeValue(RegisterX64 node)
|
||||
{
|
||||
return xmmword[node + offsetof(LuaNode, val)];
|
||||
}
|
||||
|
||||
inline void setLuauReg(AssemblyBuilderX64& build, RegisterX64 tmp, int ri, OperandX64 op)
|
||||
{
|
||||
LUAU_ASSERT(op.cat == CategoryX64::mem);
|
||||
|
||||
build.vmovups(tmp, op);
|
||||
build.vmovups(luauReg(ri), tmp);
|
||||
}
|
||||
|
||||
inline void jumpIfTagIs(AssemblyBuilderX64& build, int ri, lua_Type tag, Label& label)
|
||||
{
|
||||
build.cmp(luauRegTag(ri), tag);
|
||||
build.jcc(ConditionX64::Equal, label);
|
||||
}
|
||||
|
||||
inline void jumpIfTagIsNot(AssemblyBuilderX64& build, int ri, lua_Type tag, Label& label)
|
||||
{
|
||||
build.cmp(luauRegTag(ri), tag);
|
||||
build.jcc(ConditionX64::NotEqual, label);
|
||||
}
|
||||
|
||||
// Note: fallthrough label should be placed after this condition
|
||||
inline void jumpIfFalsy(AssemblyBuilderX64& build, int ri, Label& target, Label& fallthrough)
|
||||
{
|
||||
jumpIfTagIs(build, ri, LUA_TNIL, target); // false if nil
|
||||
jumpIfTagIsNot(build, ri, LUA_TBOOLEAN, fallthrough); // true if not nil or boolean
|
||||
|
||||
build.cmp(luauRegValueInt(ri), 0);
|
||||
build.jcc(ConditionX64::Equal, target); // true if boolean value is 'true'
|
||||
}
|
||||
|
||||
// Note: fallthrough label should be placed after this condition
|
||||
inline void jumpIfTruthy(AssemblyBuilderX64& build, int ri, Label& target, Label& fallthrough)
|
||||
{
|
||||
jumpIfTagIs(build, ri, LUA_TNIL, fallthrough); // false if nil
|
||||
jumpIfTagIsNot(build, ri, LUA_TBOOLEAN, target); // true if not nil or boolean
|
||||
|
||||
build.cmp(luauRegValueInt(ri), 0);
|
||||
build.jcc(ConditionX64::NotEqual, target); // true if boolean value is 'true'
|
||||
}
|
||||
|
||||
inline void jumpIfNodeKeyTagIsNot(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 node, lua_Type tag, Label& label)
|
||||
{
|
||||
tmp.size = SizeX64::dword;
|
||||
|
||||
build.mov(tmp, luauNodeKeyTag(node));
|
||||
build.and_(tmp, kTKeyTagMask);
|
||||
build.cmp(tmp, tag);
|
||||
build.jcc(ConditionX64::NotEqual, label);
|
||||
}
|
||||
|
||||
inline void jumpIfNodeValueTagIs(AssemblyBuilderX64& build, RegisterX64 node, lua_Type tag, Label& label)
|
||||
{
|
||||
build.cmp(dword[node + offsetof(LuaNode, val) + offsetof(TValue, tt)], tag);
|
||||
build.jcc(ConditionX64::Equal, label);
|
||||
}
|
||||
|
||||
inline void jumpIfNodeKeyNotInExpectedSlot(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 node, OperandX64 expectedKey, Label& label)
|
||||
{
|
||||
jumpIfNodeKeyTagIsNot(build, tmp, node, LUA_TSTRING, label);
|
||||
|
||||
build.mov(tmp, expectedKey);
|
||||
build.cmp(tmp, luauNodeKeyValue(node));
|
||||
build.jcc(ConditionX64::NotEqual, label);
|
||||
|
||||
jumpIfNodeValueTagIs(build, node, LUA_TNIL, label);
|
||||
}
|
||||
|
||||
void jumpOnNumberCmp(AssemblyBuilderX64& build, RegisterX64 tmp, OperandX64 lhs, OperandX64 rhs, IrCondition cond, Label& label);
|
||||
void jumpOnAnyCmpFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, IrCondition cond, Label& label);
|
||||
|
||||
void getTableNodeAtCachedSlot(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 node, RegisterX64 table, int pcpos);
|
||||
void convertNumberToIndexOrJump(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 numd, RegisterX64 numi, Label& label);
|
||||
|
||||
void callArithHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, OperandX64 c, TMS tm);
|
||||
void callLengthHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb);
|
||||
void callPrepareForN(IrRegAllocX64& regs, AssemblyBuilderX64& build, int limit, int step, int init);
|
||||
void callGetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra);
|
||||
void callSetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra);
|
||||
void checkObjectBarrierConditions(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 object, int ra, Label& skip);
|
||||
void callBarrierObject(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 object, IrOp objectOp, int ra);
|
||||
void callBarrierTableFast(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 table, IrOp tableOp);
|
||||
void callStepGc(IrRegAllocX64& regs, AssemblyBuilderX64& build);
|
||||
|
||||
void emitExit(AssemblyBuilderX64& build, bool continueInVm);
|
||||
void emitUpdateBase(AssemblyBuilderX64& build);
|
||||
void emitInterrupt(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos);
|
||||
void emitFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int offset, int pcpos);
|
||||
|
||||
void emitContinueCallInVm(AssemblyBuilderX64& build);
|
||||
|
||||
} // namespace X64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,493 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "EmitInstructionX64.h"
|
||||
|
||||
#include "Luau/AssemblyBuilderX64.h"
|
||||
#include "Luau/IrRegAllocX64.h"
|
||||
|
||||
#include "CustomExecUtils.h"
|
||||
#include "EmitCommonX64.h"
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
namespace X64
|
||||
{
|
||||
|
||||
void emitInstCall(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults)
|
||||
{
|
||||
build.mov(rArg1, rState);
|
||||
build.lea(rArg2, luauRegAddress(ra));
|
||||
|
||||
if (nparams == LUA_MULTRET)
|
||||
build.mov(rArg3, qword[rState + offsetof(lua_State, top)]);
|
||||
else
|
||||
build.lea(rArg3, luauRegAddress(ra + 1 + nparams));
|
||||
|
||||
build.mov(dwordReg(rArg4), nresults);
|
||||
build.call(qword[rNativeContext + offsetof(NativeContext, callProlog)]);
|
||||
RegisterX64 ccl = rax; // Returned from callProlog
|
||||
|
||||
emitUpdateBase(build);
|
||||
|
||||
Label cFuncCall;
|
||||
|
||||
build.test(byte[ccl + offsetof(Closure, isC)], 1);
|
||||
build.jcc(ConditionX64::NotZero, cFuncCall);
|
||||
|
||||
{
|
||||
RegisterX64 proto = rcx; // Sync with emitContinueCallInVm
|
||||
RegisterX64 ci = rdx;
|
||||
RegisterX64 argi = rsi;
|
||||
RegisterX64 argend = rdi;
|
||||
|
||||
build.mov(proto, qword[ccl + offsetof(Closure, l.p)]);
|
||||
|
||||
// Switch current Closure
|
||||
build.mov(sClosure, ccl); // Last use of 'ccl'
|
||||
|
||||
build.mov(ci, qword[rState + offsetof(lua_State, ci)]);
|
||||
|
||||
Label fillnil, exitfillnil;
|
||||
|
||||
// argi = L->top
|
||||
build.mov(argi, qword[rState + offsetof(lua_State, top)]);
|
||||
|
||||
// argend = L->base + p->numparams
|
||||
build.movzx(eax, byte[proto + offsetof(Proto, numparams)]);
|
||||
build.shl(eax, kTValueSizeLog2);
|
||||
build.lea(argend, addr[rBase + rax]);
|
||||
|
||||
// while (argi < argend) setnilvalue(argi++);
|
||||
build.setLabel(fillnil);
|
||||
build.cmp(argi, argend);
|
||||
build.jcc(ConditionX64::NotBelow, exitfillnil);
|
||||
|
||||
build.mov(dword[argi + offsetof(TValue, tt)], LUA_TNIL);
|
||||
build.add(argi, sizeof(TValue));
|
||||
build.jmp(fillnil); // This loop rarely runs so it's not worth repeating cmp/jcc
|
||||
|
||||
build.setLabel(exitfillnil);
|
||||
|
||||
// Set L->top to ci->top as most function expect (no vararg)
|
||||
build.mov(rax, qword[ci + offsetof(CallInfo, top)]);
|
||||
build.mov(qword[rState + offsetof(lua_State, top)], rax);
|
||||
|
||||
// But if it is vararg, update it to 'argi'
|
||||
Label skipVararg;
|
||||
|
||||
build.test(byte[proto + offsetof(Proto, is_vararg)], 1);
|
||||
build.jcc(ConditionX64::Zero, skipVararg);
|
||||
|
||||
build.mov(qword[rState + offsetof(lua_State, top)], argi);
|
||||
build.setLabel(skipVararg);
|
||||
|
||||
// Get native function entry
|
||||
build.mov(rax, qword[proto + offsetof(Proto, exectarget)]);
|
||||
build.test(rax, rax);
|
||||
build.jcc(ConditionX64::Zero, helpers.continueCallInVm);
|
||||
|
||||
// Mark call frame as custom
|
||||
build.mov(dword[ci + offsetof(CallInfo, flags)], LUA_CALLINFO_CUSTOM);
|
||||
|
||||
// Switch current constants
|
||||
build.mov(rConstants, qword[proto + offsetof(Proto, k)]);
|
||||
|
||||
// Switch current code
|
||||
build.mov(rdx, qword[proto + offsetof(Proto, code)]);
|
||||
build.mov(sCode, rdx);
|
||||
|
||||
build.jmp(rax);
|
||||
}
|
||||
|
||||
build.setLabel(cFuncCall);
|
||||
|
||||
{
|
||||
// results = ccl->c.f(L);
|
||||
build.mov(rArg1, rState);
|
||||
build.call(qword[ccl + offsetof(Closure, c.f)]); // Last use of 'ccl'
|
||||
RegisterX64 results = eax;
|
||||
|
||||
build.test(results, results); // test here will set SF=1 for a negative number and it always sets OF to 0
|
||||
build.jcc(ConditionX64::Less, helpers.exitNoContinueVm); // jl jumps if SF != OF
|
||||
|
||||
// We have special handling for small number of expected results below
|
||||
if (nresults != 0 && nresults != 1)
|
||||
{
|
||||
build.mov(rArg1, rState);
|
||||
build.mov(dwordReg(rArg2), nresults);
|
||||
build.mov(dwordReg(rArg3), results);
|
||||
build.call(qword[rNativeContext + offsetof(NativeContext, callEpilogC)]);
|
||||
|
||||
emitUpdateBase(build);
|
||||
return;
|
||||
}
|
||||
|
||||
RegisterX64 ci = rdx;
|
||||
RegisterX64 cip = rcx;
|
||||
RegisterX64 vali = rsi;
|
||||
|
||||
build.mov(ci, qword[rState + offsetof(lua_State, ci)]);
|
||||
build.lea(cip, addr[ci - sizeof(CallInfo)]);
|
||||
|
||||
// L->base = cip->base
|
||||
build.mov(rBase, qword[cip + offsetof(CallInfo, base)]);
|
||||
build.mov(qword[rState + offsetof(lua_State, base)], rBase);
|
||||
|
||||
if (nresults == 1)
|
||||
{
|
||||
// Opportunistically copy the result we expected from (L->top - results)
|
||||
build.mov(vali, qword[rState + offsetof(lua_State, top)]);
|
||||
build.shl(results, kTValueSizeLog2);
|
||||
build.sub(vali, qwordReg(results));
|
||||
build.vmovups(xmm0, xmmword[vali]);
|
||||
build.vmovups(luauReg(ra), xmm0);
|
||||
|
||||
Label skipnil;
|
||||
|
||||
// If there was no result, override the value with 'nil'
|
||||
build.test(results, results);
|
||||
build.jcc(ConditionX64::NotZero, skipnil);
|
||||
build.mov(luauRegTag(ra), LUA_TNIL);
|
||||
build.setLabel(skipnil);
|
||||
}
|
||||
|
||||
// L->ci = cip
|
||||
build.mov(qword[rState + offsetof(lua_State, ci)], cip);
|
||||
|
||||
// L->top = cip->top
|
||||
build.mov(rax, qword[cip + offsetof(CallInfo, top)]);
|
||||
build.mov(qword[rState + offsetof(lua_State, top)], rax);
|
||||
}
|
||||
}
|
||||
|
||||
void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int actualResults)
|
||||
{
|
||||
RegisterX64 ci = r8;
|
||||
RegisterX64 cip = r9;
|
||||
RegisterX64 res = rdi;
|
||||
RegisterX64 nresults = esi;
|
||||
|
||||
build.mov(ci, qword[rState + offsetof(lua_State, ci)]);
|
||||
build.lea(cip, addr[ci - sizeof(CallInfo)]);
|
||||
|
||||
// res = ci->func; note: we assume CALL always puts func+args and expects results to start at func
|
||||
build.mov(res, qword[ci + offsetof(CallInfo, func)]);
|
||||
// nresults = ci->nresults
|
||||
build.mov(nresults, dword[ci + offsetof(CallInfo, nresults)]);
|
||||
|
||||
{
|
||||
Label skipResultCopy;
|
||||
|
||||
RegisterX64 counter = ecx;
|
||||
|
||||
if (actualResults == 0)
|
||||
{
|
||||
// Our instruction doesn't have any results, so just fill results expected in parent with 'nil'
|
||||
build.test(nresults, nresults); // test here will set SF=1 for a negative number, ZF=1 for zero and OF=0
|
||||
build.jcc(ConditionX64::LessEqual, skipResultCopy); // jle jumps if SF != OF or ZF == 1
|
||||
|
||||
build.mov(counter, nresults);
|
||||
|
||||
Label repeatNilLoop = build.setLabel();
|
||||
build.mov(dword[res + offsetof(TValue, tt)], LUA_TNIL);
|
||||
build.add(res, sizeof(TValue));
|
||||
build.dec(counter);
|
||||
build.jcc(ConditionX64::NotZero, repeatNilLoop);
|
||||
}
|
||||
else if (actualResults == 1)
|
||||
{
|
||||
// Try setting our 1 result
|
||||
build.test(nresults, nresults);
|
||||
build.jcc(ConditionX64::Zero, skipResultCopy);
|
||||
|
||||
build.lea(counter, addr[nresults - 1]);
|
||||
|
||||
build.vmovups(xmm0, luauReg(ra));
|
||||
build.vmovups(xmmword[res], xmm0);
|
||||
build.add(res, sizeof(TValue));
|
||||
|
||||
// Fill the rest of the expected results with 'nil'
|
||||
build.test(counter, counter); // test here will set SF=1 for a negative number, ZF=1 for zero and OF=0
|
||||
build.jcc(ConditionX64::LessEqual, skipResultCopy); // jle jumps if SF != OF or ZF == 1
|
||||
|
||||
Label repeatNilLoop = build.setLabel();
|
||||
build.mov(dword[res + offsetof(TValue, tt)], LUA_TNIL);
|
||||
build.add(res, sizeof(TValue));
|
||||
build.dec(counter);
|
||||
build.jcc(ConditionX64::NotZero, repeatNilLoop);
|
||||
}
|
||||
else
|
||||
{
|
||||
RegisterX64 vali = rax;
|
||||
RegisterX64 valend = rdx;
|
||||
|
||||
// Copy return values into parent stack (but only up to nresults!)
|
||||
build.test(nresults, nresults);
|
||||
build.jcc(ConditionX64::Zero, skipResultCopy);
|
||||
|
||||
// vali = ra
|
||||
build.lea(vali, luauRegAddress(ra));
|
||||
|
||||
// Copy as much as possible for MULTRET calls, and only as much as needed otherwise
|
||||
if (actualResults == LUA_MULTRET)
|
||||
build.mov(valend, qword[rState + offsetof(lua_State, top)]); // valend = L->top
|
||||
else
|
||||
build.lea(valend, luauRegAddress(ra + actualResults)); // valend = ra + actualResults
|
||||
|
||||
build.mov(counter, nresults);
|
||||
|
||||
Label repeatValueLoop, exitValueLoop;
|
||||
|
||||
build.setLabel(repeatValueLoop);
|
||||
build.cmp(vali, valend);
|
||||
build.jcc(ConditionX64::NotBelow, exitValueLoop);
|
||||
|
||||
build.vmovups(xmm0, xmmword[vali]);
|
||||
build.vmovups(xmmword[res], xmm0);
|
||||
build.add(vali, sizeof(TValue));
|
||||
build.add(res, sizeof(TValue));
|
||||
build.dec(counter);
|
||||
build.jcc(ConditionX64::NotZero, repeatValueLoop);
|
||||
|
||||
build.setLabel(exitValueLoop);
|
||||
|
||||
// Fill the rest of the expected results with 'nil'
|
||||
build.test(counter, counter); // test here will set SF=1 for a negative number, ZF=1 for zero and OF=0
|
||||
build.jcc(ConditionX64::LessEqual, skipResultCopy); // jle jumps if SF != OF or ZF == 1
|
||||
|
||||
Label repeatNilLoop = build.setLabel();
|
||||
build.mov(dword[res + offsetof(TValue, tt)], LUA_TNIL);
|
||||
build.add(res, sizeof(TValue));
|
||||
build.dec(counter);
|
||||
build.jcc(ConditionX64::NotZero, repeatNilLoop);
|
||||
}
|
||||
|
||||
build.setLabel(skipResultCopy);
|
||||
}
|
||||
|
||||
build.mov(qword[rState + offsetof(lua_State, ci)], cip); // L->ci = cip
|
||||
build.mov(rBase, qword[cip + offsetof(CallInfo, base)]); // sync base = L->base while we have a chance
|
||||
build.mov(qword[rState + offsetof(lua_State, base)], rBase); // L->base = cip->base
|
||||
|
||||
// Start with result for LUA_MULTRET/exit value
|
||||
build.mov(qword[rState + offsetof(lua_State, top)], res); // L->top = res
|
||||
|
||||
// Unlikely, but this might be the last return from VM
|
||||
build.test(byte[ci + offsetof(CallInfo, flags)], LUA_CALLINFO_RETURN);
|
||||
build.jcc(ConditionX64::NotZero, helpers.exitNoContinueVm);
|
||||
|
||||
Label skipFixedRetTop;
|
||||
build.test(nresults, nresults); // test here will set SF=1 for a negative number and it always sets OF to 0
|
||||
build.jcc(ConditionX64::Less, skipFixedRetTop); // jl jumps if SF != OF
|
||||
build.mov(rax, qword[cip + offsetof(CallInfo, top)]);
|
||||
build.mov(qword[rState + offsetof(lua_State, top)], rax); // L->top = cip->top
|
||||
build.setLabel(skipFixedRetTop);
|
||||
|
||||
// Returning back to the previous function is a bit tricky
|
||||
// Registers alive: r9 (cip)
|
||||
RegisterX64 proto = rcx;
|
||||
RegisterX64 execdata = rbx;
|
||||
|
||||
// Change closure
|
||||
build.mov(rax, qword[cip + offsetof(CallInfo, func)]);
|
||||
build.mov(rax, qword[rax + offsetof(TValue, value.gc)]);
|
||||
build.mov(sClosure, rax);
|
||||
|
||||
build.mov(proto, qword[rax + offsetof(Closure, l.p)]);
|
||||
|
||||
build.mov(execdata, qword[proto + offsetof(Proto, execdata)]);
|
||||
|
||||
build.test(byte[cip + offsetof(CallInfo, flags)], LUA_CALLINFO_CUSTOM);
|
||||
build.jcc(ConditionX64::Zero, helpers.exitContinueVm); // Continue in interpreter if function has no native data
|
||||
|
||||
// Change constants
|
||||
build.mov(rConstants, qword[proto + offsetof(Proto, k)]);
|
||||
|
||||
// Change code
|
||||
build.mov(rdx, qword[proto + offsetof(Proto, code)]);
|
||||
build.mov(sCode, rdx);
|
||||
|
||||
build.mov(rax, qword[cip + offsetof(CallInfo, savedpc)]);
|
||||
|
||||
// To get instruction index from instruction pointer, we need to divide byte offset by 4
|
||||
// But we will actually need to scale instruction index by 4 back to byte offset later so it cancels out
|
||||
build.sub(rax, rdx);
|
||||
|
||||
// Get new instruction location and jump to it
|
||||
build.mov(edx, dword[execdata + rax]);
|
||||
build.add(rdx, qword[proto + offsetof(Proto, exectarget)]);
|
||||
build.jmp(rdx);
|
||||
}
|
||||
|
||||
void emitInstSetList(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, int count, uint32_t index)
|
||||
{
|
||||
OperandX64 last = index + count - 1;
|
||||
|
||||
// Using non-volatile 'rbx' for dynamic 'count' value (for LUA_MULTRET) to skip later recomputation
|
||||
// We also keep 'count' scaled by sizeof(TValue) here as it helps in the loop below
|
||||
RegisterX64 cscaled = rbx;
|
||||
|
||||
if (count == LUA_MULTRET)
|
||||
{
|
||||
RegisterX64 tmp = rax;
|
||||
|
||||
// count = L->top - rb
|
||||
build.mov(cscaled, qword[rState + offsetof(lua_State, top)]);
|
||||
build.lea(tmp, luauRegAddress(rb));
|
||||
build.sub(cscaled, tmp); // Using byte difference
|
||||
|
||||
// L->top = L->ci->top
|
||||
build.mov(tmp, qword[rState + offsetof(lua_State, ci)]);
|
||||
build.mov(tmp, qword[tmp + offsetof(CallInfo, top)]);
|
||||
build.mov(qword[rState + offsetof(lua_State, top)], tmp);
|
||||
|
||||
// last = index + count - 1;
|
||||
last = edx;
|
||||
build.mov(last, dwordReg(cscaled));
|
||||
build.shr(last, kTValueSizeLog2);
|
||||
build.add(last, index - 1);
|
||||
}
|
||||
|
||||
Label skipResize;
|
||||
|
||||
RegisterX64 table = regs.takeReg(rax, kInvalidInstIdx);
|
||||
|
||||
build.mov(table, luauRegValue(ra));
|
||||
|
||||
// Resize if h->sizearray < last
|
||||
build.cmp(dword[table + offsetof(Table, sizearray)], last);
|
||||
build.jcc(ConditionX64::NotBelow, skipResize);
|
||||
|
||||
// Argument setup reordered to avoid conflicts
|
||||
LUAU_ASSERT(rArg3 != table);
|
||||
build.mov(dwordReg(rArg3), last);
|
||||
build.mov(rArg2, table);
|
||||
build.mov(rArg1, rState);
|
||||
build.call(qword[rNativeContext + offsetof(NativeContext, luaH_resizearray)]);
|
||||
build.mov(table, luauRegValue(ra)); // Reload cloberred register value
|
||||
|
||||
build.setLabel(skipResize);
|
||||
|
||||
RegisterX64 arrayDst = rdx;
|
||||
RegisterX64 offset = rcx;
|
||||
|
||||
build.mov(arrayDst, qword[table + offsetof(Table, array)]);
|
||||
|
||||
const int kUnrollSetListLimit = 4;
|
||||
|
||||
if (count != LUA_MULTRET && count <= kUnrollSetListLimit)
|
||||
{
|
||||
for (int i = 0; i < count; ++i)
|
||||
{
|
||||
// setobj2t(L, &array[index + i - 1], rb + i);
|
||||
build.vmovups(xmm0, luauRegValue(rb + i));
|
||||
build.vmovups(xmmword[arrayDst + (index + i - 1) * sizeof(TValue)], xmm0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LUAU_ASSERT(count != 0);
|
||||
|
||||
build.xor_(offset, offset);
|
||||
if (index != 1)
|
||||
build.add(arrayDst, (index - 1) * sizeof(TValue));
|
||||
|
||||
Label repeatLoop, endLoop;
|
||||
OperandX64 limit = count == LUA_MULTRET ? cscaled : OperandX64(count * sizeof(TValue));
|
||||
|
||||
// If c is static, we will always do at least one iteration
|
||||
if (count == LUA_MULTRET)
|
||||
{
|
||||
build.cmp(offset, limit);
|
||||
build.jcc(ConditionX64::NotBelow, endLoop);
|
||||
}
|
||||
|
||||
build.setLabel(repeatLoop);
|
||||
|
||||
// setobj2t(L, &array[index + i - 1], rb + i);
|
||||
build.vmovups(xmm0, xmmword[offset + rBase + rb * sizeof(TValue)]); // luauReg(rb) unwrapped to add offset
|
||||
build.vmovups(xmmword[offset + arrayDst], xmm0);
|
||||
|
||||
build.add(offset, sizeof(TValue));
|
||||
build.cmp(offset, limit);
|
||||
build.jcc(ConditionX64::Below, repeatLoop);
|
||||
|
||||
build.setLabel(endLoop);
|
||||
}
|
||||
|
||||
callBarrierTableFast(regs, build, table, {});
|
||||
}
|
||||
|
||||
void emitInstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat)
|
||||
{
|
||||
// ipairs-style traversal is handled in IR
|
||||
LUAU_ASSERT(aux >= 0);
|
||||
|
||||
// This is a fast-path for builtin table iteration, tag check for 'ra' has to be performed before emitting this instruction
|
||||
|
||||
// Registers are chosen in this way to simplify fallback code for the node part
|
||||
RegisterX64 table = rArg2;
|
||||
RegisterX64 index = rArg3;
|
||||
RegisterX64 elemPtr = rax;
|
||||
|
||||
build.mov(table, luauRegValue(ra + 1));
|
||||
build.mov(index, luauRegValue(ra + 2));
|
||||
|
||||
// &array[index]
|
||||
build.mov(dwordReg(elemPtr), dwordReg(index));
|
||||
build.shl(dwordReg(elemPtr), kTValueSizeLog2);
|
||||
build.add(elemPtr, qword[table + offsetof(Table, array)]);
|
||||
|
||||
// Clear extra variables since we might have more than two
|
||||
for (int i = 2; i < aux; ++i)
|
||||
build.mov(luauRegTag(ra + 3 + i), LUA_TNIL);
|
||||
|
||||
Label skipArray, skipArrayNil;
|
||||
|
||||
// First we advance index through the array portion
|
||||
// while (unsigned(index) < unsigned(sizearray))
|
||||
Label arrayLoop = build.setLabel();
|
||||
build.cmp(dwordReg(index), dword[table + offsetof(Table, sizearray)]);
|
||||
build.jcc(ConditionX64::NotBelow, skipArray);
|
||||
|
||||
// If element is nil, we increment the index; if it's not, we still need 'index + 1' inside
|
||||
build.inc(index);
|
||||
|
||||
build.cmp(dword[elemPtr + offsetof(TValue, tt)], LUA_TNIL);
|
||||
build.jcc(ConditionX64::Equal, skipArrayNil);
|
||||
|
||||
// setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)));
|
||||
build.mov(luauRegValue(ra + 2), index);
|
||||
// Tag should already be set to lightuserdata
|
||||
|
||||
// setnvalue(ra + 3, double(index + 1));
|
||||
build.vcvtsi2sd(xmm0, xmm0, dwordReg(index));
|
||||
build.vmovsd(luauRegValue(ra + 3), xmm0);
|
||||
build.mov(luauRegTag(ra + 3), LUA_TNUMBER);
|
||||
|
||||
// setobj2s(L, ra + 4, e);
|
||||
setLuauReg(build, xmm2, ra + 4, xmmword[elemPtr]);
|
||||
|
||||
build.jmp(loopRepeat);
|
||||
|
||||
build.setLabel(skipArrayNil);
|
||||
|
||||
// Index already incremented, advance to next array element
|
||||
build.add(elemPtr, sizeof(TValue));
|
||||
build.jmp(arrayLoop);
|
||||
|
||||
build.setLabel(skipArray);
|
||||
|
||||
// Call helper to assign next node value or to signal loop exit
|
||||
build.mov(rArg1, rState);
|
||||
// rArg2 and rArg3 are already set
|
||||
build.lea(rArg4, luauRegAddress(ra));
|
||||
build.call(qword[rNativeContext + offsetof(NativeContext, forgLoopNodeIter)]);
|
||||
build.test(al, al);
|
||||
build.jcc(ConditionX64::NotZero, loopRepeat);
|
||||
}
|
||||
|
||||
} // namespace X64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,27 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
struct Label;
|
||||
struct ModuleHelpers;
|
||||
|
||||
namespace X64
|
||||
{
|
||||
|
||||
class AssemblyBuilderX64;
|
||||
struct IrRegAllocX64;
|
||||
|
||||
void emitInstCall(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults);
|
||||
void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int actualResults);
|
||||
void emitInstSetList(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, int count, uint32_t index);
|
||||
void emitInstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat);
|
||||
|
||||
} // namespace X64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,691 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "Luau/IrAnalysis.h"
|
||||
|
||||
#include "Luau/DenseHash.h"
|
||||
#include "Luau/IrData.h"
|
||||
#include "Luau/IrUtils.h"
|
||||
|
||||
#include "lobject.h"
|
||||
|
||||
#include <bitset>
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
void updateUseCounts(IrFunction& function)
|
||||
{
|
||||
std::vector<IrBlock>& blocks = function.blocks;
|
||||
std::vector<IrInst>& instructions = function.instructions;
|
||||
|
||||
for (IrBlock& block : blocks)
|
||||
block.useCount = 0;
|
||||
|
||||
for (IrInst& inst : instructions)
|
||||
inst.useCount = 0;
|
||||
|
||||
auto checkOp = [&](IrOp op) {
|
||||
if (op.kind == IrOpKind::Inst)
|
||||
{
|
||||
IrInst& target = instructions[op.index];
|
||||
LUAU_ASSERT(target.useCount < 0xffff);
|
||||
target.useCount++;
|
||||
}
|
||||
else if (op.kind == IrOpKind::Block)
|
||||
{
|
||||
IrBlock& target = blocks[op.index];
|
||||
LUAU_ASSERT(target.useCount < 0xffff);
|
||||
target.useCount++;
|
||||
}
|
||||
};
|
||||
|
||||
for (IrInst& inst : instructions)
|
||||
{
|
||||
checkOp(inst.a);
|
||||
checkOp(inst.b);
|
||||
checkOp(inst.c);
|
||||
checkOp(inst.d);
|
||||
checkOp(inst.e);
|
||||
checkOp(inst.f);
|
||||
}
|
||||
}
|
||||
|
||||
void updateLastUseLocations(IrFunction& function)
|
||||
{
|
||||
std::vector<IrInst>& instructions = function.instructions;
|
||||
|
||||
for (IrInst& inst : instructions)
|
||||
inst.lastUse = 0;
|
||||
|
||||
for (size_t instIdx = 0; instIdx < instructions.size(); ++instIdx)
|
||||
{
|
||||
IrInst& inst = instructions[instIdx];
|
||||
|
||||
auto checkOp = [&](IrOp op) {
|
||||
if (op.kind == IrOpKind::Inst)
|
||||
instructions[op.index].lastUse = uint32_t(instIdx);
|
||||
};
|
||||
|
||||
if (isPseudo(inst.cmd))
|
||||
continue;
|
||||
|
||||
checkOp(inst.a);
|
||||
checkOp(inst.b);
|
||||
checkOp(inst.c);
|
||||
checkOp(inst.d);
|
||||
checkOp(inst.e);
|
||||
checkOp(inst.f);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t getNextInstUse(IrFunction& function, uint32_t targetInstIdx, uint32_t startInstIdx)
|
||||
{
|
||||
LUAU_ASSERT(startInstIdx < function.instructions.size());
|
||||
IrInst& targetInst = function.instructions[targetInstIdx];
|
||||
|
||||
for (uint32_t i = startInstIdx; i <= targetInst.lastUse; i++)
|
||||
{
|
||||
IrInst& inst = function.instructions[i];
|
||||
|
||||
if (isPseudo(inst.cmd))
|
||||
continue;
|
||||
|
||||
if (inst.a.kind == IrOpKind::Inst && inst.a.index == targetInstIdx)
|
||||
return i;
|
||||
|
||||
if (inst.b.kind == IrOpKind::Inst && inst.b.index == targetInstIdx)
|
||||
return i;
|
||||
|
||||
if (inst.c.kind == IrOpKind::Inst && inst.c.index == targetInstIdx)
|
||||
return i;
|
||||
|
||||
if (inst.d.kind == IrOpKind::Inst && inst.d.index == targetInstIdx)
|
||||
return i;
|
||||
|
||||
if (inst.e.kind == IrOpKind::Inst && inst.e.index == targetInstIdx)
|
||||
return i;
|
||||
|
||||
if (inst.f.kind == IrOpKind::Inst && inst.f.index == targetInstIdx)
|
||||
return i;
|
||||
}
|
||||
|
||||
// There must be a next use since there is the last use location
|
||||
LUAU_ASSERT(!"failed to find next use");
|
||||
return targetInst.lastUse;
|
||||
}
|
||||
|
||||
std::pair<uint32_t, uint32_t> getLiveInOutValueCount(IrFunction& function, IrBlock& block)
|
||||
{
|
||||
uint32_t liveIns = 0;
|
||||
uint32_t liveOuts = 0;
|
||||
|
||||
auto checkOp = [&](IrOp op) {
|
||||
if (op.kind == IrOpKind::Inst)
|
||||
{
|
||||
if (op.index >= block.start && op.index <= block.finish)
|
||||
liveOuts--;
|
||||
else
|
||||
liveIns++;
|
||||
}
|
||||
};
|
||||
|
||||
for (uint32_t instIdx = block.start; instIdx <= block.finish; instIdx++)
|
||||
{
|
||||
IrInst& inst = function.instructions[instIdx];
|
||||
|
||||
if (isPseudo(inst.cmd))
|
||||
continue;
|
||||
|
||||
liveOuts += inst.useCount;
|
||||
|
||||
checkOp(inst.a);
|
||||
checkOp(inst.b);
|
||||
checkOp(inst.c);
|
||||
checkOp(inst.d);
|
||||
checkOp(inst.e);
|
||||
checkOp(inst.f);
|
||||
}
|
||||
|
||||
return std::make_pair(liveIns, liveOuts);
|
||||
}
|
||||
|
||||
uint32_t getLiveInValueCount(IrFunction& function, IrBlock& block)
|
||||
{
|
||||
return getLiveInOutValueCount(function, block).first;
|
||||
}
|
||||
|
||||
uint32_t getLiveOutValueCount(IrFunction& function, IrBlock& block)
|
||||
{
|
||||
return getLiveInOutValueCount(function, block).second;
|
||||
}
|
||||
|
||||
void requireVariadicSequence(RegisterSet& sourceRs, const RegisterSet& defRs, uint8_t varargStart)
|
||||
{
|
||||
if (!defRs.varargSeq)
|
||||
{
|
||||
// Peel away registers from variadic sequence that we define
|
||||
while (defRs.regs.test(varargStart))
|
||||
varargStart++;
|
||||
|
||||
LUAU_ASSERT(!sourceRs.varargSeq || sourceRs.varargStart == varargStart);
|
||||
|
||||
sourceRs.varargSeq = true;
|
||||
sourceRs.varargStart = varargStart;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Variadic use sequence might include registers before def sequence
|
||||
for (int i = varargStart; i < defRs.varargStart; i++)
|
||||
{
|
||||
if (!defRs.regs.test(i))
|
||||
sourceRs.regs.set(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static RegisterSet computeBlockLiveInRegSet(IrFunction& function, const IrBlock& block, RegisterSet& defRs, std::bitset<256>& capturedRegs)
|
||||
{
|
||||
RegisterSet inRs;
|
||||
|
||||
auto def = [&](IrOp op, int offset = 0) {
|
||||
defRs.regs.set(vmRegOp(op) + offset, true);
|
||||
};
|
||||
|
||||
auto use = [&](IrOp op, int offset = 0) {
|
||||
if (!defRs.regs.test(vmRegOp(op) + offset))
|
||||
inRs.regs.set(vmRegOp(op) + offset, true);
|
||||
};
|
||||
|
||||
auto maybeDef = [&](IrOp op) {
|
||||
if (op.kind == IrOpKind::VmReg)
|
||||
defRs.regs.set(vmRegOp(op), true);
|
||||
};
|
||||
|
||||
auto maybeUse = [&](IrOp op) {
|
||||
if (op.kind == IrOpKind::VmReg)
|
||||
{
|
||||
if (!defRs.regs.test(vmRegOp(op)))
|
||||
inRs.regs.set(vmRegOp(op), true);
|
||||
}
|
||||
};
|
||||
|
||||
auto defVarargs = [&](uint8_t varargStart) {
|
||||
defRs.varargSeq = true;
|
||||
defRs.varargStart = varargStart;
|
||||
};
|
||||
|
||||
auto useVarargs = [&](uint8_t varargStart) {
|
||||
requireVariadicSequence(inRs, defRs, varargStart);
|
||||
|
||||
// Variadic sequence has been consumed
|
||||
defRs.varargSeq = false;
|
||||
defRs.varargStart = 0;
|
||||
};
|
||||
|
||||
auto defRange = [&](int start, int count) {
|
||||
if (count == -1)
|
||||
{
|
||||
defVarargs(start);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = start; i < start + count; i++)
|
||||
defRs.regs.set(i, true);
|
||||
}
|
||||
};
|
||||
|
||||
auto useRange = [&](int start, int count) {
|
||||
if (count == -1)
|
||||
{
|
||||
useVarargs(start);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = start; i < start + count; i++)
|
||||
{
|
||||
if (!defRs.regs.test(i))
|
||||
inRs.regs.set(i, true);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
for (uint32_t instIdx = block.start; instIdx <= block.finish; instIdx++)
|
||||
{
|
||||
const IrInst& inst = function.instructions[instIdx];
|
||||
|
||||
// For correct analysis, all instruction uses must be handled before handling the definitions
|
||||
switch (inst.cmd)
|
||||
{
|
||||
case IrCmd::LOAD_TAG:
|
||||
case IrCmd::LOAD_POINTER:
|
||||
case IrCmd::LOAD_DOUBLE:
|
||||
case IrCmd::LOAD_INT:
|
||||
case IrCmd::LOAD_TVALUE:
|
||||
maybeUse(inst.a); // Argument can also be a VmConst
|
||||
break;
|
||||
case IrCmd::STORE_TAG:
|
||||
case IrCmd::STORE_POINTER:
|
||||
case IrCmd::STORE_DOUBLE:
|
||||
case IrCmd::STORE_INT:
|
||||
case IrCmd::STORE_VECTOR:
|
||||
case IrCmd::STORE_TVALUE:
|
||||
maybeDef(inst.a); // Argument can also be a pointer value
|
||||
break;
|
||||
case IrCmd::JUMP_IF_TRUTHY:
|
||||
case IrCmd::JUMP_IF_FALSY:
|
||||
use(inst.a);
|
||||
break;
|
||||
case IrCmd::JUMP_CMP_ANY:
|
||||
use(inst.a);
|
||||
use(inst.b);
|
||||
break;
|
||||
// A <- B, C
|
||||
case IrCmd::DO_ARITH:
|
||||
case IrCmd::GET_TABLE:
|
||||
use(inst.b);
|
||||
maybeUse(inst.c); // Argument can also be a VmConst
|
||||
|
||||
def(inst.a);
|
||||
break;
|
||||
case IrCmd::SET_TABLE:
|
||||
use(inst.a);
|
||||
use(inst.b);
|
||||
maybeUse(inst.c); // Argument can also be a VmConst
|
||||
break;
|
||||
// A <- B
|
||||
case IrCmd::DO_LEN:
|
||||
use(inst.b);
|
||||
|
||||
def(inst.a);
|
||||
break;
|
||||
case IrCmd::GET_IMPORT:
|
||||
def(inst.a);
|
||||
break;
|
||||
case IrCmd::CONCAT:
|
||||
useRange(vmRegOp(inst.a), function.uintOp(inst.b));
|
||||
|
||||
defRange(vmRegOp(inst.a), function.uintOp(inst.b));
|
||||
break;
|
||||
case IrCmd::GET_UPVALUE:
|
||||
def(inst.a);
|
||||
break;
|
||||
case IrCmd::SET_UPVALUE:
|
||||
use(inst.b);
|
||||
break;
|
||||
case IrCmd::PREPARE_FORN:
|
||||
use(inst.a);
|
||||
use(inst.b);
|
||||
use(inst.c);
|
||||
|
||||
def(inst.a);
|
||||
def(inst.b);
|
||||
def(inst.c);
|
||||
break;
|
||||
case IrCmd::INTERRUPT:
|
||||
break;
|
||||
case IrCmd::BARRIER_OBJ:
|
||||
case IrCmd::BARRIER_TABLE_FORWARD:
|
||||
use(inst.b);
|
||||
break;
|
||||
case IrCmd::CLOSE_UPVALS:
|
||||
// Closing an upvalue should be counted as a register use (it copies the fresh register value)
|
||||
// But we lack the required information about the specific set of registers that are affected
|
||||
// Because we don't plan to optimize captured registers atm, we skip full dataflow analysis for them right now
|
||||
break;
|
||||
case IrCmd::CAPTURE:
|
||||
maybeUse(inst.a);
|
||||
|
||||
if (function.boolOp(inst.b))
|
||||
capturedRegs.set(vmRegOp(inst.a), true);
|
||||
break;
|
||||
case IrCmd::SETLIST:
|
||||
use(inst.b);
|
||||
useRange(vmRegOp(inst.c), function.intOp(inst.d));
|
||||
break;
|
||||
case IrCmd::CALL:
|
||||
use(inst.a);
|
||||
useRange(vmRegOp(inst.a) + 1, function.intOp(inst.b));
|
||||
|
||||
defRange(vmRegOp(inst.a), function.intOp(inst.c));
|
||||
break;
|
||||
case IrCmd::RETURN:
|
||||
useRange(vmRegOp(inst.a), function.intOp(inst.b));
|
||||
break;
|
||||
|
||||
// TODO: FASTCALL is more restrictive than INVOKE_FASTCALL; we should either determine the exact semantics, or rework it
|
||||
case IrCmd::FASTCALL:
|
||||
case IrCmd::INVOKE_FASTCALL:
|
||||
if (int count = function.intOp(inst.e); count != -1)
|
||||
{
|
||||
if (count >= 3)
|
||||
{
|
||||
LUAU_ASSERT(inst.d.kind == IrOpKind::VmReg && vmRegOp(inst.d) == vmRegOp(inst.c) + 1);
|
||||
|
||||
useRange(vmRegOp(inst.c), count);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (count >= 1)
|
||||
use(inst.c);
|
||||
|
||||
if (count >= 2)
|
||||
maybeUse(inst.d); // Argument can also be a VmConst
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
useVarargs(vmRegOp(inst.c));
|
||||
}
|
||||
|
||||
// Multiple return sequences (count == -1) are defined by ADJUST_STACK_TO_REG
|
||||
if (int count = function.intOp(inst.f); count != -1)
|
||||
defRange(vmRegOp(inst.b), count);
|
||||
break;
|
||||
case IrCmd::FORGLOOP:
|
||||
// First register is not used by instruction, we check that it's still 'nil' with CHECK_TAG
|
||||
use(inst.a, 1);
|
||||
use(inst.a, 2);
|
||||
|
||||
def(inst.a, 2);
|
||||
defRange(vmRegOp(inst.a) + 3, function.intOp(inst.b));
|
||||
break;
|
||||
case IrCmd::FORGLOOP_FALLBACK:
|
||||
useRange(vmRegOp(inst.a), 3);
|
||||
|
||||
def(inst.a, 2);
|
||||
defRange(vmRegOp(inst.a) + 3, uint8_t(function.intOp(inst.b))); // ignore most significant bit
|
||||
break;
|
||||
case IrCmd::FORGPREP_XNEXT_FALLBACK:
|
||||
use(inst.b);
|
||||
break;
|
||||
case IrCmd::FALLBACK_GETGLOBAL:
|
||||
def(inst.b);
|
||||
break;
|
||||
case IrCmd::FALLBACK_SETGLOBAL:
|
||||
use(inst.b);
|
||||
break;
|
||||
case IrCmd::FALLBACK_GETTABLEKS:
|
||||
use(inst.c);
|
||||
|
||||
def(inst.b);
|
||||
break;
|
||||
case IrCmd::FALLBACK_SETTABLEKS:
|
||||
use(inst.b);
|
||||
use(inst.c);
|
||||
break;
|
||||
case IrCmd::FALLBACK_NAMECALL:
|
||||
use(inst.c);
|
||||
|
||||
defRange(vmRegOp(inst.b), 2);
|
||||
break;
|
||||
case IrCmd::FALLBACK_PREPVARARGS:
|
||||
// No effect on explicitly referenced registers
|
||||
break;
|
||||
case IrCmd::FALLBACK_GETVARARGS:
|
||||
defRange(vmRegOp(inst.b), function.intOp(inst.c));
|
||||
break;
|
||||
case IrCmd::FALLBACK_NEWCLOSURE:
|
||||
def(inst.b);
|
||||
break;
|
||||
case IrCmd::FALLBACK_DUPCLOSURE:
|
||||
def(inst.b);
|
||||
break;
|
||||
case IrCmd::FALLBACK_FORGPREP:
|
||||
use(inst.b);
|
||||
|
||||
defRange(vmRegOp(inst.b), 3);
|
||||
break;
|
||||
case IrCmd::ADJUST_STACK_TO_REG:
|
||||
defRange(vmRegOp(inst.a), -1);
|
||||
break;
|
||||
case IrCmd::ADJUST_STACK_TO_TOP:
|
||||
// While this can be considered to be a vararg consumer, it is already handled in fastcall instructions
|
||||
break;
|
||||
|
||||
default:
|
||||
// All instructions which reference registers have to be handled explicitly
|
||||
LUAU_ASSERT(inst.a.kind != IrOpKind::VmReg);
|
||||
LUAU_ASSERT(inst.b.kind != IrOpKind::VmReg);
|
||||
LUAU_ASSERT(inst.c.kind != IrOpKind::VmReg);
|
||||
LUAU_ASSERT(inst.d.kind != IrOpKind::VmReg);
|
||||
LUAU_ASSERT(inst.e.kind != IrOpKind::VmReg);
|
||||
LUAU_ASSERT(inst.f.kind != IrOpKind::VmReg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return inRs;
|
||||
}
|
||||
|
||||
// The algorithm used here is commonly known as backwards data-flow analysis.
|
||||
// For each block, we track 'upward-exposed' (live-in) uses of registers - a use of a register that hasn't been defined in the block yet.
|
||||
// We also track the set of registers that were defined in the block.
|
||||
// When initial live-in sets of registers are computed, propagation of those uses upwards through predecessors is performed.
|
||||
// If predecessor doesn't define the register, we have to add it to the live-in set.
|
||||
// Extending the set of live-in registers of a block requires re-checking of that block.
|
||||
// Propagation runs iteratively, using a worklist of blocks to visit until a fixed point is reached.
|
||||
// This algorithm can be easily extended to cover phi instructions, but we don't use those yet.
|
||||
static void computeCfgLiveInOutRegSets(IrFunction& function)
|
||||
{
|
||||
CfgInfo& info = function.cfg;
|
||||
|
||||
// Clear existing data
|
||||
// 'in' and 'captured' data is not cleared because it will be overwritten below
|
||||
info.def.clear();
|
||||
info.out.clear();
|
||||
|
||||
// Try to compute Luau VM register use-def info
|
||||
info.in.resize(function.blocks.size());
|
||||
info.def.resize(function.blocks.size());
|
||||
info.out.resize(function.blocks.size());
|
||||
|
||||
// Captured registers are tracked for the whole function
|
||||
// It should be possible to have a more precise analysis for them in the future
|
||||
std::bitset<256> capturedRegs;
|
||||
|
||||
// First we compute live-in set of each block
|
||||
for (size_t blockIdx = 0; blockIdx < function.blocks.size(); blockIdx++)
|
||||
{
|
||||
const IrBlock& block = function.blocks[blockIdx];
|
||||
|
||||
if (block.kind == IrBlockKind::Dead)
|
||||
continue;
|
||||
|
||||
info.in[blockIdx] = computeBlockLiveInRegSet(function, block, info.def[blockIdx], capturedRegs);
|
||||
}
|
||||
|
||||
info.captured.regs = capturedRegs;
|
||||
|
||||
// With live-in sets ready, we can arrive at a fixed point for both in/out registers by requesting required registers from predecessors
|
||||
std::vector<uint32_t> worklist;
|
||||
|
||||
std::vector<uint8_t> inWorklist;
|
||||
inWorklist.resize(function.blocks.size(), false);
|
||||
|
||||
// We will have to visit each block at least once, so we add all of them to the worklist immediately
|
||||
for (size_t blockIdx = 0; blockIdx < function.blocks.size(); blockIdx++)
|
||||
{
|
||||
const IrBlock& block = function.blocks[blockIdx];
|
||||
|
||||
if (block.kind == IrBlockKind::Dead)
|
||||
continue;
|
||||
|
||||
worklist.push_back(uint32_t(blockIdx));
|
||||
inWorklist[blockIdx] = true;
|
||||
}
|
||||
|
||||
while (!worklist.empty())
|
||||
{
|
||||
uint32_t blockIdx = worklist.back();
|
||||
worklist.pop_back();
|
||||
inWorklist[blockIdx] = false;
|
||||
|
||||
IrBlock& curr = function.blocks[blockIdx];
|
||||
RegisterSet& inRs = info.in[blockIdx];
|
||||
RegisterSet& defRs = info.def[blockIdx];
|
||||
RegisterSet& outRs = info.out[blockIdx];
|
||||
|
||||
// Current block has to provide all registers in successor blocks
|
||||
BlockIteratorWrapper successorsIt = successors(info, blockIdx);
|
||||
for (uint32_t succIdx : successorsIt)
|
||||
{
|
||||
IrBlock& succ = function.blocks[succIdx];
|
||||
|
||||
// This is a step away from the usual definition of live range flow through CFG
|
||||
// Exit from a regular block to a fallback block is not considered a block terminator
|
||||
// This is because fallback blocks define an alternative implementation of the same operations
|
||||
// This can cause the current block to define more registers that actually were available at fallback entry
|
||||
if (curr.kind != IrBlockKind::Fallback && succ.kind == IrBlockKind::Fallback)
|
||||
{
|
||||
// If this is the only successor, this skip will not be valid
|
||||
LUAU_ASSERT(successorsIt.size() != 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
const RegisterSet& succRs = info.in[succIdx];
|
||||
|
||||
outRs.regs |= succRs.regs;
|
||||
|
||||
if (succRs.varargSeq)
|
||||
{
|
||||
LUAU_ASSERT(!outRs.varargSeq || outRs.varargStart == succRs.varargStart);
|
||||
|
||||
outRs.varargSeq = true;
|
||||
outRs.varargStart = succRs.varargStart;
|
||||
}
|
||||
}
|
||||
|
||||
RegisterSet oldInRs = inRs;
|
||||
|
||||
// If current block didn't define a live-out, it has to be live-in
|
||||
inRs.regs |= outRs.regs & ~defRs.regs;
|
||||
|
||||
if (outRs.varargSeq)
|
||||
requireVariadicSequence(inRs, defRs, outRs.varargStart);
|
||||
|
||||
// If we have new live-ins, we have to notify all predecessors
|
||||
// We don't allow changes to the start of the variadic sequence, so we skip checking that member
|
||||
if (inRs.regs != oldInRs.regs || inRs.varargSeq != oldInRs.varargSeq)
|
||||
{
|
||||
for (uint32_t predIdx : predecessors(info, blockIdx))
|
||||
{
|
||||
if (!inWorklist[predIdx])
|
||||
{
|
||||
worklist.push_back(predIdx);
|
||||
inWorklist[predIdx] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If Proto data is available, validate that entry block arguments match required registers
|
||||
if (function.proto)
|
||||
{
|
||||
RegisterSet& entryIn = info.in[0];
|
||||
|
||||
LUAU_ASSERT(!entryIn.varargSeq);
|
||||
|
||||
for (size_t i = 0; i < entryIn.regs.size(); i++)
|
||||
LUAU_ASSERT(!entryIn.regs.test(i) || i < function.proto->numparams);
|
||||
}
|
||||
}
|
||||
|
||||
static void computeCfgBlockEdges(IrFunction& function)
|
||||
{
|
||||
CfgInfo& info = function.cfg;
|
||||
|
||||
// Clear existing data
|
||||
info.predecessorsOffsets.clear();
|
||||
info.successorsOffsets.clear();
|
||||
|
||||
// Compute predecessors block edges
|
||||
info.predecessorsOffsets.reserve(function.blocks.size());
|
||||
info.successorsOffsets.reserve(function.blocks.size());
|
||||
|
||||
int edgeCount = 0;
|
||||
|
||||
for (const IrBlock& block : function.blocks)
|
||||
{
|
||||
info.predecessorsOffsets.push_back(edgeCount);
|
||||
edgeCount += block.useCount;
|
||||
}
|
||||
|
||||
info.predecessors.resize(edgeCount);
|
||||
info.successors.resize(edgeCount);
|
||||
|
||||
edgeCount = 0;
|
||||
|
||||
for (size_t blockIdx = 0; blockIdx < function.blocks.size(); blockIdx++)
|
||||
{
|
||||
const IrBlock& block = function.blocks[blockIdx];
|
||||
|
||||
info.successorsOffsets.push_back(edgeCount);
|
||||
|
||||
if (block.kind == IrBlockKind::Dead)
|
||||
continue;
|
||||
|
||||
for (uint32_t instIdx = block.start; instIdx <= block.finish; instIdx++)
|
||||
{
|
||||
const IrInst& inst = function.instructions[instIdx];
|
||||
|
||||
auto checkOp = [&](IrOp op) {
|
||||
if (op.kind == IrOpKind::Block)
|
||||
{
|
||||
// We use a trick here, where we use the starting offset of the predecessor list as the position where to write next predecessor
|
||||
// The values will be adjusted back in a separate loop later
|
||||
info.predecessors[info.predecessorsOffsets[op.index]++] = uint32_t(blockIdx);
|
||||
|
||||
info.successors[edgeCount++] = op.index;
|
||||
}
|
||||
};
|
||||
|
||||
checkOp(inst.a);
|
||||
checkOp(inst.b);
|
||||
checkOp(inst.c);
|
||||
checkOp(inst.d);
|
||||
checkOp(inst.e);
|
||||
checkOp(inst.f);
|
||||
}
|
||||
}
|
||||
|
||||
// Offsets into the predecessor list were used as iterators in the previous loop
|
||||
// To adjust them back, block use count is subtracted (predecessor count is equal to how many uses block has)
|
||||
for (size_t blockIdx = 0; blockIdx < function.blocks.size(); blockIdx++)
|
||||
{
|
||||
const IrBlock& block = function.blocks[blockIdx];
|
||||
|
||||
info.predecessorsOffsets[blockIdx] -= block.useCount;
|
||||
}
|
||||
}
|
||||
|
||||
void computeCfgInfo(IrFunction& function)
|
||||
{
|
||||
computeCfgBlockEdges(function);
|
||||
computeCfgLiveInOutRegSets(function);
|
||||
}
|
||||
|
||||
BlockIteratorWrapper predecessors(const CfgInfo& cfg, uint32_t blockIdx)
|
||||
{
|
||||
LUAU_ASSERT(blockIdx < cfg.predecessorsOffsets.size());
|
||||
|
||||
uint32_t start = cfg.predecessorsOffsets[blockIdx];
|
||||
uint32_t end = blockIdx + 1 < cfg.predecessorsOffsets.size() ? cfg.predecessorsOffsets[blockIdx + 1] : uint32_t(cfg.predecessors.size());
|
||||
|
||||
return BlockIteratorWrapper{cfg.predecessors.data() + start, cfg.predecessors.data() + end};
|
||||
}
|
||||
|
||||
BlockIteratorWrapper successors(const CfgInfo& cfg, uint32_t blockIdx)
|
||||
{
|
||||
LUAU_ASSERT(blockIdx < cfg.successorsOffsets.size());
|
||||
|
||||
uint32_t start = cfg.successorsOffsets[blockIdx];
|
||||
uint32_t end = blockIdx + 1 < cfg.successorsOffsets.size() ? cfg.successorsOffsets[blockIdx + 1] : uint32_t(cfg.successors.size());
|
||||
|
||||
return BlockIteratorWrapper{cfg.successors.data() + start, cfg.successors.data() + end};
|
||||
}
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,651 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "Luau/IrBuilder.h"
|
||||
|
||||
#include "Luau/IrAnalysis.h"
|
||||
#include "Luau/IrUtils.h"
|
||||
|
||||
#include "CustomExecUtils.h"
|
||||
#include "IrTranslation.h"
|
||||
|
||||
#include "lapi.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
constexpr unsigned kNoAssociatedBlockIndex = ~0u;
|
||||
|
||||
IrBuilder::IrBuilder()
|
||||
: constantMap({IrConstKind::Bool, ~0ull})
|
||||
{
|
||||
}
|
||||
|
||||
void IrBuilder::buildFunctionIr(Proto* proto)
|
||||
{
|
||||
function.proto = proto;
|
||||
|
||||
// Rebuild original control flow blocks
|
||||
rebuildBytecodeBasicBlocks(proto);
|
||||
|
||||
function.bcMapping.resize(proto->sizecode, {~0u, ~0u});
|
||||
|
||||
// Translate all instructions to IR inside blocks
|
||||
for (int i = 0; i < proto->sizecode;)
|
||||
{
|
||||
const Instruction* pc = &proto->code[i];
|
||||
LuauOpcode op = LuauOpcode(LUAU_INSN_OP(*pc));
|
||||
|
||||
int nexti = i + getOpLength(op);
|
||||
LUAU_ASSERT(nexti <= proto->sizecode);
|
||||
|
||||
function.bcMapping[i] = {uint32_t(function.instructions.size()), ~0u};
|
||||
|
||||
// Begin new block at this instruction if it was in the bytecode or requested during translation
|
||||
if (instIndexToBlock[i] != kNoAssociatedBlockIndex)
|
||||
beginBlock(blockAtInst(i));
|
||||
|
||||
// We skip dead bytecode instructions when they appear after block was already terminated
|
||||
if (!inTerminatedBlock)
|
||||
translateInst(op, pc, i);
|
||||
|
||||
i = nexti;
|
||||
LUAU_ASSERT(i <= proto->sizecode);
|
||||
|
||||
// If we are going into a new block at the next instruction and it's a fallthrough, jump has to be placed to mark block termination
|
||||
if (i < int(instIndexToBlock.size()) && instIndexToBlock[i] != kNoAssociatedBlockIndex)
|
||||
{
|
||||
if (!isBlockTerminator(function.instructions.back().cmd))
|
||||
inst(IrCmd::JUMP, blockAtInst(i));
|
||||
}
|
||||
}
|
||||
|
||||
// Now that all has been generated, compute use counts
|
||||
updateUseCounts(function);
|
||||
}
|
||||
|
||||
void IrBuilder::rebuildBytecodeBasicBlocks(Proto* proto)
|
||||
{
|
||||
instIndexToBlock.resize(proto->sizecode, kNoAssociatedBlockIndex);
|
||||
|
||||
// Mark jump targets
|
||||
std::vector<uint8_t> jumpTargets(proto->sizecode, 0);
|
||||
|
||||
for (int i = 0; i < proto->sizecode;)
|
||||
{
|
||||
const Instruction* pc = &proto->code[i];
|
||||
LuauOpcode op = LuauOpcode(LUAU_INSN_OP(*pc));
|
||||
|
||||
int target = getJumpTarget(*pc, uint32_t(i));
|
||||
|
||||
if (target >= 0 && !isFastCall(op))
|
||||
jumpTargets[target] = true;
|
||||
|
||||
i += getOpLength(op);
|
||||
LUAU_ASSERT(i <= proto->sizecode);
|
||||
}
|
||||
|
||||
|
||||
// Bytecode blocks are created at bytecode jump targets and the start of a function
|
||||
jumpTargets[0] = true;
|
||||
|
||||
for (int i = 0; i < proto->sizecode; i++)
|
||||
{
|
||||
if (jumpTargets[i])
|
||||
{
|
||||
IrOp b = block(IrBlockKind::Bytecode);
|
||||
instIndexToBlock[i] = b.index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void IrBuilder::translateInst(LuauOpcode op, const Instruction* pc, int i)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case LOP_NOP:
|
||||
break;
|
||||
case LOP_LOADNIL:
|
||||
translateInstLoadNil(*this, pc);
|
||||
break;
|
||||
case LOP_LOADB:
|
||||
translateInstLoadB(*this, pc, i);
|
||||
break;
|
||||
case LOP_LOADN:
|
||||
translateInstLoadN(*this, pc);
|
||||
break;
|
||||
case LOP_LOADK:
|
||||
translateInstLoadK(*this, pc);
|
||||
break;
|
||||
case LOP_LOADKX:
|
||||
translateInstLoadKX(*this, pc);
|
||||
break;
|
||||
case LOP_MOVE:
|
||||
translateInstMove(*this, pc);
|
||||
break;
|
||||
case LOP_GETGLOBAL:
|
||||
translateInstGetGlobal(*this, pc, i);
|
||||
break;
|
||||
case LOP_SETGLOBAL:
|
||||
translateInstSetGlobal(*this, pc, i);
|
||||
break;
|
||||
case LOP_CALL:
|
||||
inst(IrCmd::INTERRUPT, constUint(i));
|
||||
inst(IrCmd::SET_SAVEDPC, constUint(i + 1));
|
||||
|
||||
inst(IrCmd::CALL, vmReg(LUAU_INSN_A(*pc)), constInt(LUAU_INSN_B(*pc) - 1), constInt(LUAU_INSN_C(*pc) - 1));
|
||||
|
||||
if (activeFastcallFallback)
|
||||
{
|
||||
inst(IrCmd::JUMP, fastcallFallbackReturn);
|
||||
|
||||
beginBlock(fastcallFallbackReturn);
|
||||
|
||||
activeFastcallFallback = false;
|
||||
}
|
||||
break;
|
||||
case LOP_RETURN:
|
||||
inst(IrCmd::INTERRUPT, constUint(i));
|
||||
|
||||
inst(IrCmd::RETURN, vmReg(LUAU_INSN_A(*pc)), constInt(LUAU_INSN_B(*pc) - 1));
|
||||
break;
|
||||
case LOP_GETTABLE:
|
||||
translateInstGetTable(*this, pc, i);
|
||||
break;
|
||||
case LOP_SETTABLE:
|
||||
translateInstSetTable(*this, pc, i);
|
||||
break;
|
||||
case LOP_GETTABLEKS:
|
||||
translateInstGetTableKS(*this, pc, i);
|
||||
break;
|
||||
case LOP_SETTABLEKS:
|
||||
translateInstSetTableKS(*this, pc, i);
|
||||
break;
|
||||
case LOP_GETTABLEN:
|
||||
translateInstGetTableN(*this, pc, i);
|
||||
break;
|
||||
case LOP_SETTABLEN:
|
||||
translateInstSetTableN(*this, pc, i);
|
||||
break;
|
||||
case LOP_JUMP:
|
||||
translateInstJump(*this, pc, i);
|
||||
break;
|
||||
case LOP_JUMPBACK:
|
||||
translateInstJumpBack(*this, pc, i);
|
||||
break;
|
||||
case LOP_JUMPIF:
|
||||
translateInstJumpIf(*this, pc, i, /* not_ */ false);
|
||||
break;
|
||||
case LOP_JUMPIFNOT:
|
||||
translateInstJumpIf(*this, pc, i, /* not_ */ true);
|
||||
break;
|
||||
case LOP_JUMPIFEQ:
|
||||
translateInstJumpIfEq(*this, pc, i, /* not_ */ false);
|
||||
break;
|
||||
case LOP_JUMPIFLE:
|
||||
translateInstJumpIfCond(*this, pc, i, IrCondition::LessEqual);
|
||||
break;
|
||||
case LOP_JUMPIFLT:
|
||||
translateInstJumpIfCond(*this, pc, i, IrCondition::Less);
|
||||
break;
|
||||
case LOP_JUMPIFNOTEQ:
|
||||
translateInstJumpIfEq(*this, pc, i, /* not_ */ true);
|
||||
break;
|
||||
case LOP_JUMPIFNOTLE:
|
||||
translateInstJumpIfCond(*this, pc, i, IrCondition::NotLessEqual);
|
||||
break;
|
||||
case LOP_JUMPIFNOTLT:
|
||||
translateInstJumpIfCond(*this, pc, i, IrCondition::NotLess);
|
||||
break;
|
||||
case LOP_JUMPX:
|
||||
translateInstJumpX(*this, pc, i);
|
||||
break;
|
||||
case LOP_JUMPXEQKNIL:
|
||||
translateInstJumpxEqNil(*this, pc, i);
|
||||
break;
|
||||
case LOP_JUMPXEQKB:
|
||||
translateInstJumpxEqB(*this, pc, i);
|
||||
break;
|
||||
case LOP_JUMPXEQKN:
|
||||
translateInstJumpxEqN(*this, pc, i);
|
||||
break;
|
||||
case LOP_JUMPXEQKS:
|
||||
translateInstJumpxEqS(*this, pc, i);
|
||||
break;
|
||||
case LOP_ADD:
|
||||
translateInstBinary(*this, pc, i, TM_ADD);
|
||||
break;
|
||||
case LOP_SUB:
|
||||
translateInstBinary(*this, pc, i, TM_SUB);
|
||||
break;
|
||||
case LOP_MUL:
|
||||
translateInstBinary(*this, pc, i, TM_MUL);
|
||||
break;
|
||||
case LOP_DIV:
|
||||
translateInstBinary(*this, pc, i, TM_DIV);
|
||||
break;
|
||||
case LOP_MOD:
|
||||
translateInstBinary(*this, pc, i, TM_MOD);
|
||||
break;
|
||||
case LOP_POW:
|
||||
translateInstBinary(*this, pc, i, TM_POW);
|
||||
break;
|
||||
case LOP_ADDK:
|
||||
translateInstBinaryK(*this, pc, i, TM_ADD);
|
||||
break;
|
||||
case LOP_SUBK:
|
||||
translateInstBinaryK(*this, pc, i, TM_SUB);
|
||||
break;
|
||||
case LOP_MULK:
|
||||
translateInstBinaryK(*this, pc, i, TM_MUL);
|
||||
break;
|
||||
case LOP_DIVK:
|
||||
translateInstBinaryK(*this, pc, i, TM_DIV);
|
||||
break;
|
||||
case LOP_MODK:
|
||||
translateInstBinaryK(*this, pc, i, TM_MOD);
|
||||
break;
|
||||
case LOP_POWK:
|
||||
translateInstBinaryK(*this, pc, i, TM_POW);
|
||||
break;
|
||||
case LOP_NOT:
|
||||
translateInstNot(*this, pc);
|
||||
break;
|
||||
case LOP_MINUS:
|
||||
translateInstMinus(*this, pc, i);
|
||||
break;
|
||||
case LOP_LENGTH:
|
||||
translateInstLength(*this, pc, i);
|
||||
break;
|
||||
case LOP_NEWTABLE:
|
||||
translateInstNewTable(*this, pc, i);
|
||||
break;
|
||||
case LOP_DUPTABLE:
|
||||
translateInstDupTable(*this, pc, i);
|
||||
break;
|
||||
case LOP_SETLIST:
|
||||
inst(IrCmd::SETLIST, constUint(i), vmReg(LUAU_INSN_A(*pc)), vmReg(LUAU_INSN_B(*pc)), constInt(LUAU_INSN_C(*pc) - 1), constUint(pc[1]));
|
||||
break;
|
||||
case LOP_GETUPVAL:
|
||||
translateInstGetUpval(*this, pc, i);
|
||||
break;
|
||||
case LOP_SETUPVAL:
|
||||
translateInstSetUpval(*this, pc, i);
|
||||
break;
|
||||
case LOP_CLOSEUPVALS:
|
||||
translateInstCloseUpvals(*this, pc);
|
||||
break;
|
||||
case LOP_FASTCALL:
|
||||
{
|
||||
int skip = LUAU_INSN_C(*pc);
|
||||
IrOp next = blockAtInst(i + skip + 2);
|
||||
|
||||
translateFastCallN(*this, pc, i, false, 0, {}, next);
|
||||
|
||||
activeFastcallFallback = true;
|
||||
fastcallFallbackReturn = next;
|
||||
break;
|
||||
}
|
||||
case LOP_FASTCALL1:
|
||||
{
|
||||
int skip = LUAU_INSN_C(*pc);
|
||||
IrOp next = blockAtInst(i + skip + 2);
|
||||
|
||||
translateFastCallN(*this, pc, i, true, 1, undef(), next);
|
||||
|
||||
activeFastcallFallback = true;
|
||||
fastcallFallbackReturn = next;
|
||||
break;
|
||||
}
|
||||
case LOP_FASTCALL2:
|
||||
{
|
||||
int skip = LUAU_INSN_C(*pc);
|
||||
IrOp next = blockAtInst(i + skip + 2);
|
||||
|
||||
translateFastCallN(*this, pc, i, true, 2, vmReg(pc[1]), next);
|
||||
|
||||
activeFastcallFallback = true;
|
||||
fastcallFallbackReturn = next;
|
||||
break;
|
||||
}
|
||||
case LOP_FASTCALL2K:
|
||||
{
|
||||
int skip = LUAU_INSN_C(*pc);
|
||||
IrOp next = blockAtInst(i + skip + 2);
|
||||
|
||||
translateFastCallN(*this, pc, i, true, 2, vmConst(pc[1]), next);
|
||||
|
||||
activeFastcallFallback = true;
|
||||
fastcallFallbackReturn = next;
|
||||
break;
|
||||
}
|
||||
case LOP_FORNPREP:
|
||||
translateInstForNPrep(*this, pc, i);
|
||||
break;
|
||||
case LOP_FORNLOOP:
|
||||
translateInstForNLoop(*this, pc, i);
|
||||
break;
|
||||
case LOP_FORGLOOP:
|
||||
{
|
||||
int aux = int(pc[1]);
|
||||
|
||||
// We have a translation for ipairs-style traversal, general loop iteration is still too complex
|
||||
if (aux < 0)
|
||||
{
|
||||
translateInstForGLoopIpairs(*this, pc, i);
|
||||
}
|
||||
else
|
||||
{
|
||||
int ra = LUAU_INSN_A(*pc);
|
||||
|
||||
IrOp loopRepeat = blockAtInst(i + 1 + LUAU_INSN_D(*pc));
|
||||
IrOp loopExit = blockAtInst(i + getOpLength(LOP_FORGLOOP));
|
||||
IrOp fallback = block(IrBlockKind::Fallback);
|
||||
|
||||
inst(IrCmd::INTERRUPT, constUint(i));
|
||||
loadAndCheckTag(vmReg(ra), LUA_TNIL, fallback);
|
||||
|
||||
inst(IrCmd::FORGLOOP, vmReg(ra), constInt(aux), loopRepeat, loopExit);
|
||||
|
||||
beginBlock(fallback);
|
||||
inst(IrCmd::SET_SAVEDPC, constUint(i + 1));
|
||||
inst(IrCmd::FORGLOOP_FALLBACK, vmReg(ra), constInt(aux), loopRepeat, loopExit);
|
||||
|
||||
beginBlock(loopExit);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case LOP_FORGPREP_NEXT:
|
||||
translateInstForGPrepNext(*this, pc, i);
|
||||
break;
|
||||
case LOP_FORGPREP_INEXT:
|
||||
translateInstForGPrepInext(*this, pc, i);
|
||||
break;
|
||||
case LOP_AND:
|
||||
translateInstAndX(*this, pc, i, vmReg(LUAU_INSN_C(*pc)));
|
||||
break;
|
||||
case LOP_ANDK:
|
||||
translateInstAndX(*this, pc, i, vmConst(LUAU_INSN_C(*pc)));
|
||||
break;
|
||||
case LOP_OR:
|
||||
translateInstOrX(*this, pc, i, vmReg(LUAU_INSN_C(*pc)));
|
||||
break;
|
||||
case LOP_ORK:
|
||||
translateInstOrX(*this, pc, i, vmConst(LUAU_INSN_C(*pc)));
|
||||
break;
|
||||
case LOP_COVERAGE:
|
||||
inst(IrCmd::COVERAGE, constUint(i));
|
||||
break;
|
||||
case LOP_GETIMPORT:
|
||||
translateInstGetImport(*this, pc, i);
|
||||
break;
|
||||
case LOP_CONCAT:
|
||||
translateInstConcat(*this, pc, i);
|
||||
break;
|
||||
case LOP_CAPTURE:
|
||||
translateInstCapture(*this, pc, i);
|
||||
break;
|
||||
case LOP_NAMECALL:
|
||||
translateInstNamecall(*this, pc, i);
|
||||
break;
|
||||
case LOP_PREPVARARGS:
|
||||
inst(IrCmd::FALLBACK_PREPVARARGS, constUint(i), constInt(LUAU_INSN_A(*pc)));
|
||||
break;
|
||||
case LOP_GETVARARGS:
|
||||
inst(IrCmd::FALLBACK_GETVARARGS, constUint(i), vmReg(LUAU_INSN_A(*pc)), constInt(LUAU_INSN_B(*pc) - 1));
|
||||
break;
|
||||
case LOP_NEWCLOSURE:
|
||||
inst(IrCmd::FALLBACK_NEWCLOSURE, constUint(i), vmReg(LUAU_INSN_A(*pc)), constUint(LUAU_INSN_D(*pc)));
|
||||
break;
|
||||
case LOP_DUPCLOSURE:
|
||||
inst(IrCmd::FALLBACK_DUPCLOSURE, constUint(i), vmReg(LUAU_INSN_A(*pc)), vmConst(LUAU_INSN_D(*pc)));
|
||||
break;
|
||||
case LOP_FORGPREP:
|
||||
{
|
||||
IrOp loopStart = blockAtInst(i + 1 + LUAU_INSN_D(*pc));
|
||||
|
||||
inst(IrCmd::FALLBACK_FORGPREP, constUint(i), vmReg(LUAU_INSN_A(*pc)), loopStart);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LUAU_ASSERT(!"unknown instruction");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool IrBuilder::isInternalBlock(IrOp block)
|
||||
{
|
||||
IrBlock& target = function.blocks[block.index];
|
||||
|
||||
return target.kind == IrBlockKind::Internal;
|
||||
}
|
||||
|
||||
void IrBuilder::beginBlock(IrOp block)
|
||||
{
|
||||
IrBlock& target = function.blocks[block.index];
|
||||
activeBlockIdx = block.index;
|
||||
|
||||
LUAU_ASSERT(target.start == ~0u || target.start == uint32_t(function.instructions.size()));
|
||||
|
||||
target.start = uint32_t(function.instructions.size());
|
||||
|
||||
inTerminatedBlock = false;
|
||||
}
|
||||
|
||||
void IrBuilder::loadAndCheckTag(IrOp loc, uint8_t tag, IrOp fallback)
|
||||
{
|
||||
inst(IrCmd::CHECK_TAG, inst(IrCmd::LOAD_TAG, loc), constTag(tag), fallback);
|
||||
}
|
||||
|
||||
void IrBuilder::clone(const IrBlock& source, bool removeCurrentTerminator)
|
||||
{
|
||||
DenseHashMap<uint32_t, uint32_t> instRedir{~0u};
|
||||
|
||||
auto redirect = [&instRedir](IrOp& op) {
|
||||
if (op.kind == IrOpKind::Inst)
|
||||
{
|
||||
if (const uint32_t* newIndex = instRedir.find(op.index))
|
||||
op.index = *newIndex;
|
||||
else
|
||||
LUAU_ASSERT(!"values can only be used if they are defined in the same block");
|
||||
}
|
||||
};
|
||||
|
||||
if (removeCurrentTerminator && inTerminatedBlock)
|
||||
{
|
||||
IrBlock& active = function.blocks[activeBlockIdx];
|
||||
IrInst& term = function.instructions[active.finish];
|
||||
|
||||
kill(function, term);
|
||||
inTerminatedBlock = false;
|
||||
}
|
||||
|
||||
for (uint32_t index = source.start; index <= source.finish; index++)
|
||||
{
|
||||
LUAU_ASSERT(index < function.instructions.size());
|
||||
IrInst clone = function.instructions[index];
|
||||
|
||||
// Skip pseudo instructions to make clone more compact, but validate that they have no users
|
||||
if (isPseudo(clone.cmd))
|
||||
{
|
||||
LUAU_ASSERT(clone.useCount == 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
redirect(clone.a);
|
||||
redirect(clone.b);
|
||||
redirect(clone.c);
|
||||
redirect(clone.d);
|
||||
redirect(clone.e);
|
||||
redirect(clone.f);
|
||||
|
||||
addUse(function, clone.a);
|
||||
addUse(function, clone.b);
|
||||
addUse(function, clone.c);
|
||||
addUse(function, clone.d);
|
||||
addUse(function, clone.e);
|
||||
addUse(function, clone.f);
|
||||
|
||||
// Instructions that referenced the original will have to be adjusted to use the clone
|
||||
instRedir[index] = uint32_t(function.instructions.size());
|
||||
|
||||
// Reconstruct the fresh clone
|
||||
inst(clone.cmd, clone.a, clone.b, clone.c, clone.d, clone.e, clone.f);
|
||||
}
|
||||
}
|
||||
|
||||
IrOp IrBuilder::undef()
|
||||
{
|
||||
return {IrOpKind::Undef, 0};
|
||||
}
|
||||
|
||||
IrOp IrBuilder::constBool(bool value)
|
||||
{
|
||||
IrConst constant;
|
||||
constant.kind = IrConstKind::Bool;
|
||||
constant.valueBool = value;
|
||||
return constAny(constant, uint64_t(value));
|
||||
}
|
||||
|
||||
IrOp IrBuilder::constInt(int value)
|
||||
{
|
||||
IrConst constant;
|
||||
constant.kind = IrConstKind::Int;
|
||||
constant.valueInt = value;
|
||||
return constAny(constant, uint64_t(value));
|
||||
}
|
||||
|
||||
IrOp IrBuilder::constUint(unsigned value)
|
||||
{
|
||||
IrConst constant;
|
||||
constant.kind = IrConstKind::Uint;
|
||||
constant.valueUint = value;
|
||||
return constAny(constant, uint64_t(value));
|
||||
}
|
||||
|
||||
IrOp IrBuilder::constDouble(double value)
|
||||
{
|
||||
IrConst constant;
|
||||
constant.kind = IrConstKind::Double;
|
||||
constant.valueDouble = value;
|
||||
|
||||
uint64_t asCommonKey;
|
||||
static_assert(sizeof(asCommonKey) == sizeof(value), "Expecting double to be 64-bit");
|
||||
memcpy(&asCommonKey, &value, sizeof(value));
|
||||
|
||||
return constAny(constant, asCommonKey);
|
||||
}
|
||||
|
||||
IrOp IrBuilder::constTag(uint8_t value)
|
||||
{
|
||||
IrConst constant;
|
||||
constant.kind = IrConstKind::Tag;
|
||||
constant.valueTag = value;
|
||||
return constAny(constant, uint64_t(value));
|
||||
}
|
||||
|
||||
IrOp IrBuilder::constAny(IrConst constant, uint64_t asCommonKey)
|
||||
{
|
||||
ConstantKey key{constant.kind, asCommonKey};
|
||||
|
||||
if (uint32_t* cache = constantMap.find(key))
|
||||
return {IrOpKind::Constant, *cache};
|
||||
|
||||
uint32_t index = uint32_t(function.constants.size());
|
||||
function.constants.push_back(constant);
|
||||
|
||||
constantMap[key] = index;
|
||||
|
||||
return {IrOpKind::Constant, index};
|
||||
}
|
||||
|
||||
IrOp IrBuilder::cond(IrCondition cond)
|
||||
{
|
||||
return {IrOpKind::Condition, uint32_t(cond)};
|
||||
}
|
||||
|
||||
IrOp IrBuilder::inst(IrCmd cmd)
|
||||
{
|
||||
return inst(cmd, {}, {}, {}, {}, {}, {});
|
||||
}
|
||||
|
||||
IrOp IrBuilder::inst(IrCmd cmd, IrOp a)
|
||||
{
|
||||
return inst(cmd, a, {}, {}, {}, {}, {});
|
||||
}
|
||||
|
||||
IrOp IrBuilder::inst(IrCmd cmd, IrOp a, IrOp b)
|
||||
{
|
||||
return inst(cmd, a, b, {}, {}, {}, {});
|
||||
}
|
||||
|
||||
IrOp IrBuilder::inst(IrCmd cmd, IrOp a, IrOp b, IrOp c)
|
||||
{
|
||||
return inst(cmd, a, b, c, {}, {}, {});
|
||||
}
|
||||
|
||||
IrOp IrBuilder::inst(IrCmd cmd, IrOp a, IrOp b, IrOp c, IrOp d)
|
||||
{
|
||||
return inst(cmd, a, b, c, d, {}, {});
|
||||
}
|
||||
|
||||
IrOp IrBuilder::inst(IrCmd cmd, IrOp a, IrOp b, IrOp c, IrOp d, IrOp e)
|
||||
{
|
||||
return inst(cmd, a, b, c, d, e, {});
|
||||
}
|
||||
|
||||
IrOp IrBuilder::inst(IrCmd cmd, IrOp a, IrOp b, IrOp c, IrOp d, IrOp e, IrOp f)
|
||||
{
|
||||
uint32_t index = uint32_t(function.instructions.size());
|
||||
function.instructions.push_back({cmd, a, b, c, d, e, f});
|
||||
|
||||
LUAU_ASSERT(!inTerminatedBlock);
|
||||
|
||||
if (isBlockTerminator(cmd))
|
||||
{
|
||||
function.blocks[activeBlockIdx].finish = index;
|
||||
inTerminatedBlock = true;
|
||||
}
|
||||
|
||||
return {IrOpKind::Inst, index};
|
||||
}
|
||||
|
||||
IrOp IrBuilder::block(IrBlockKind kind)
|
||||
{
|
||||
if (kind == IrBlockKind::Internal && activeFastcallFallback)
|
||||
kind = IrBlockKind::Fallback;
|
||||
|
||||
uint32_t index = uint32_t(function.blocks.size());
|
||||
function.blocks.push_back(IrBlock{kind});
|
||||
return IrOp{IrOpKind::Block, index};
|
||||
}
|
||||
|
||||
IrOp IrBuilder::blockAtInst(uint32_t index)
|
||||
{
|
||||
uint32_t blockIndex = instIndexToBlock[index];
|
||||
|
||||
if (blockIndex != kNoAssociatedBlockIndex)
|
||||
return IrOp{IrOpKind::Block, blockIndex};
|
||||
|
||||
return block(IrBlockKind::Internal);
|
||||
}
|
||||
|
||||
IrOp IrBuilder::vmReg(uint8_t index)
|
||||
{
|
||||
return {IrOpKind::VmReg, index};
|
||||
}
|
||||
|
||||
IrOp IrBuilder::vmConst(uint32_t index)
|
||||
{
|
||||
return {IrOpKind::VmConst, index};
|
||||
}
|
||||
|
||||
IrOp IrBuilder::vmUpvalue(uint8_t index)
|
||||
{
|
||||
return {IrOpKind::VmUpvalue, index};
|
||||
}
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,431 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "Luau/IrCallWrapperX64.h"
|
||||
|
||||
#include "Luau/AssemblyBuilderX64.h"
|
||||
#include "Luau/IrRegAllocX64.h"
|
||||
|
||||
#include "EmitCommonX64.h"
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
namespace X64
|
||||
{
|
||||
|
||||
static const std::array<OperandX64, 6> kWindowsGprOrder = {rcx, rdx, r8, r9, addr[rsp + 32], addr[rsp + 40]};
|
||||
static const std::array<OperandX64, 6> kSystemvGprOrder = {rdi, rsi, rdx, rcx, r8, r9};
|
||||
static const std::array<OperandX64, 4> kXmmOrder = {xmm0, xmm1, xmm2, xmm3}; // Common order for first 4 fp arguments on Windows/SystemV
|
||||
|
||||
static bool sameUnderlyingRegister(RegisterX64 a, RegisterX64 b)
|
||||
{
|
||||
SizeX64 underlyingSizeA = a.size == SizeX64::xmmword ? SizeX64::xmmword : SizeX64::qword;
|
||||
SizeX64 underlyingSizeB = b.size == SizeX64::xmmword ? SizeX64::xmmword : SizeX64::qword;
|
||||
|
||||
return underlyingSizeA == underlyingSizeB && a.index == b.index;
|
||||
}
|
||||
|
||||
IrCallWrapperX64::IrCallWrapperX64(IrRegAllocX64& regs, AssemblyBuilderX64& build, uint32_t instIdx)
|
||||
: regs(regs)
|
||||
, build(build)
|
||||
, instIdx(instIdx)
|
||||
, funcOp(noreg)
|
||||
{
|
||||
gprUses.fill(0);
|
||||
xmmUses.fill(0);
|
||||
}
|
||||
|
||||
void IrCallWrapperX64::addArgument(SizeX64 targetSize, OperandX64 source, IrOp sourceOp)
|
||||
{
|
||||
// Instruction operands rely on current instruction index for lifetime tracking
|
||||
LUAU_ASSERT(instIdx != kInvalidInstIdx || sourceOp.kind == IrOpKind::None);
|
||||
|
||||
LUAU_ASSERT(argCount < kMaxCallArguments);
|
||||
CallArgument& arg = args[argCount++];
|
||||
arg = {targetSize, source, sourceOp};
|
||||
|
||||
arg.target = getNextArgumentTarget(targetSize);
|
||||
|
||||
if (build.abi == ABIX64::Windows)
|
||||
{
|
||||
// On Windows, gpr/xmm register positions move in sync
|
||||
gprPos++;
|
||||
xmmPos++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (targetSize == SizeX64::xmmword)
|
||||
xmmPos++;
|
||||
else
|
||||
gprPos++;
|
||||
}
|
||||
}
|
||||
|
||||
void IrCallWrapperX64::addArgument(SizeX64 targetSize, ScopedRegX64& scopedReg)
|
||||
{
|
||||
addArgument(targetSize, scopedReg.release(), {});
|
||||
}
|
||||
|
||||
void IrCallWrapperX64::call(const OperandX64& func)
|
||||
{
|
||||
funcOp = func;
|
||||
|
||||
countRegisterUses();
|
||||
|
||||
for (int i = 0; i < argCount; ++i)
|
||||
{
|
||||
CallArgument& arg = args[i];
|
||||
|
||||
if (arg.sourceOp.kind != IrOpKind::None)
|
||||
{
|
||||
if (IrInst* inst = regs.function.asInstOp(arg.sourceOp))
|
||||
{
|
||||
// Source registers are recorded separately from source operands in CallArgument
|
||||
// If source is the last use of IrInst, clear the register from the operand
|
||||
if (regs.isLastUseReg(*inst, instIdx))
|
||||
inst->regX64 = noreg;
|
||||
// If it's not the last use and register is volatile, register ownership is taken, which also spills the operand
|
||||
else if (inst->regX64.size == SizeX64::xmmword || regs.shouldFreeGpr(inst->regX64))
|
||||
regs.takeReg(inst->regX64, kInvalidInstIdx);
|
||||
}
|
||||
}
|
||||
|
||||
// Immediate values are stored at the end since they are not interfering and target register can still be used temporarily
|
||||
if (arg.source.cat == CategoryX64::imm)
|
||||
{
|
||||
arg.candidate = false;
|
||||
}
|
||||
// Arguments passed through stack can be handled immediately
|
||||
else if (arg.target.cat == CategoryX64::mem)
|
||||
{
|
||||
if (arg.source.cat == CategoryX64::mem)
|
||||
{
|
||||
ScopedRegX64 tmp{regs, arg.target.memSize};
|
||||
|
||||
freeSourceRegisters(arg);
|
||||
|
||||
if (arg.source.memSize == SizeX64::none)
|
||||
build.lea(tmp.reg, arg.source);
|
||||
else
|
||||
build.mov(tmp.reg, arg.source);
|
||||
|
||||
build.mov(arg.target, tmp.reg);
|
||||
}
|
||||
else
|
||||
{
|
||||
freeSourceRegisters(arg);
|
||||
|
||||
build.mov(arg.target, arg.source);
|
||||
}
|
||||
|
||||
arg.candidate = false;
|
||||
}
|
||||
// Skip arguments that are already in their place
|
||||
else if (arg.source.cat == CategoryX64::reg && sameUnderlyingRegister(arg.target.base, arg.source.base))
|
||||
{
|
||||
freeSourceRegisters(arg);
|
||||
|
||||
// If target is not used as source in other arguments, prevent register allocator from giving it out
|
||||
if (getRegisterUses(arg.target.base) == 0)
|
||||
regs.takeReg(arg.target.base, kInvalidInstIdx);
|
||||
else // Otherwise, make sure we won't free it when last source use is completed
|
||||
addRegisterUse(arg.target.base);
|
||||
|
||||
arg.candidate = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Repeat until we run out of arguments to pass
|
||||
while (true)
|
||||
{
|
||||
// Find target argument register that is not an active source
|
||||
if (CallArgument* candidate = findNonInterferingArgument())
|
||||
{
|
||||
// This section is only for handling register targets
|
||||
LUAU_ASSERT(candidate->target.cat == CategoryX64::reg);
|
||||
|
||||
freeSourceRegisters(*candidate);
|
||||
|
||||
LUAU_ASSERT(getRegisterUses(candidate->target.base) == 0);
|
||||
regs.takeReg(candidate->target.base, kInvalidInstIdx);
|
||||
|
||||
moveToTarget(*candidate);
|
||||
|
||||
candidate->candidate = false;
|
||||
}
|
||||
// If all registers cross-interfere (rcx <- rdx, rdx <- rcx), one has to be renamed
|
||||
else if (RegisterX64 conflict = findConflictingTarget(); conflict != noreg)
|
||||
{
|
||||
renameConflictingRegister(conflict);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < argCount; ++i)
|
||||
LUAU_ASSERT(!args[i].candidate);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle immediate arguments last
|
||||
for (int i = 0; i < argCount; ++i)
|
||||
{
|
||||
CallArgument& arg = args[i];
|
||||
|
||||
if (arg.source.cat == CategoryX64::imm)
|
||||
{
|
||||
// There could be a conflict with the function source register, make this argument a candidate to find it
|
||||
arg.candidate = true;
|
||||
|
||||
if (RegisterX64 conflict = findConflictingTarget(); conflict != noreg)
|
||||
renameConflictingRegister(conflict);
|
||||
|
||||
if (arg.target.cat == CategoryX64::reg)
|
||||
regs.takeReg(arg.target.base, kInvalidInstIdx);
|
||||
|
||||
moveToTarget(arg);
|
||||
|
||||
arg.candidate = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Free registers used in the function call
|
||||
removeRegisterUse(funcOp.base);
|
||||
removeRegisterUse(funcOp.index);
|
||||
|
||||
// Just before the call is made, argument registers are all marked as free in register allocator
|
||||
for (int i = 0; i < argCount; ++i)
|
||||
{
|
||||
CallArgument& arg = args[i];
|
||||
|
||||
if (arg.target.cat == CategoryX64::reg)
|
||||
regs.freeReg(arg.target.base);
|
||||
}
|
||||
|
||||
regs.preserveAndFreeInstValues();
|
||||
|
||||
regs.assertAllFree();
|
||||
|
||||
build.call(funcOp);
|
||||
}
|
||||
|
||||
RegisterX64 IrCallWrapperX64::suggestNextArgumentRegister(SizeX64 size) const
|
||||
{
|
||||
OperandX64 target = getNextArgumentTarget(size);
|
||||
|
||||
return target.cat == CategoryX64::reg ? regs.takeReg(target.base, kInvalidInstIdx) : regs.allocReg(size, kInvalidInstIdx);
|
||||
}
|
||||
|
||||
OperandX64 IrCallWrapperX64::getNextArgumentTarget(SizeX64 size) const
|
||||
{
|
||||
if (size == SizeX64::xmmword)
|
||||
{
|
||||
LUAU_ASSERT(size_t(xmmPos) < kXmmOrder.size());
|
||||
return kXmmOrder[xmmPos];
|
||||
}
|
||||
|
||||
const std::array<OperandX64, 6>& gprOrder = build.abi == ABIX64::Windows ? kWindowsGprOrder : kSystemvGprOrder;
|
||||
|
||||
LUAU_ASSERT(size_t(gprPos) < gprOrder.size());
|
||||
OperandX64 target = gprOrder[gprPos];
|
||||
|
||||
// Keep requested argument size
|
||||
if (target.cat == CategoryX64::reg)
|
||||
target.base.size = size;
|
||||
else if (target.cat == CategoryX64::mem)
|
||||
target.memSize = size;
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
void IrCallWrapperX64::countRegisterUses()
|
||||
{
|
||||
for (int i = 0; i < argCount; ++i)
|
||||
{
|
||||
addRegisterUse(args[i].source.base);
|
||||
addRegisterUse(args[i].source.index);
|
||||
}
|
||||
|
||||
addRegisterUse(funcOp.base);
|
||||
addRegisterUse(funcOp.index);
|
||||
}
|
||||
|
||||
CallArgument* IrCallWrapperX64::findNonInterferingArgument()
|
||||
{
|
||||
for (int i = 0; i < argCount; ++i)
|
||||
{
|
||||
CallArgument& arg = args[i];
|
||||
|
||||
if (arg.candidate && !interferesWithActiveSources(arg, i) && !interferesWithOperand(funcOp, arg.target.base))
|
||||
return &arg;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool IrCallWrapperX64::interferesWithOperand(const OperandX64& op, RegisterX64 reg) const
|
||||
{
|
||||
return sameUnderlyingRegister(op.base, reg) || sameUnderlyingRegister(op.index, reg);
|
||||
}
|
||||
|
||||
bool IrCallWrapperX64::interferesWithActiveSources(const CallArgument& targetArg, int targetArgIndex) const
|
||||
{
|
||||
for (int i = 0; i < argCount; ++i)
|
||||
{
|
||||
const CallArgument& arg = args[i];
|
||||
|
||||
if (arg.candidate && i != targetArgIndex && interferesWithOperand(arg.source, targetArg.target.base))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IrCallWrapperX64::interferesWithActiveTarget(RegisterX64 sourceReg) const
|
||||
{
|
||||
for (int i = 0; i < argCount; ++i)
|
||||
{
|
||||
const CallArgument& arg = args[i];
|
||||
|
||||
if (arg.candidate && sameUnderlyingRegister(arg.target.base, sourceReg))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void IrCallWrapperX64::moveToTarget(CallArgument& arg)
|
||||
{
|
||||
if (arg.source.cat == CategoryX64::reg)
|
||||
{
|
||||
RegisterX64 source = arg.source.base;
|
||||
|
||||
if (source.size == SizeX64::xmmword)
|
||||
build.vmovsd(arg.target, source, source);
|
||||
else
|
||||
build.mov(arg.target, source);
|
||||
}
|
||||
else if (arg.source.cat == CategoryX64::imm)
|
||||
{
|
||||
build.mov(arg.target, arg.source);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (arg.source.memSize == SizeX64::none)
|
||||
build.lea(arg.target, arg.source);
|
||||
else if (arg.target.base.size == SizeX64::xmmword && arg.source.memSize == SizeX64::xmmword)
|
||||
build.vmovups(arg.target, arg.source);
|
||||
else if (arg.target.base.size == SizeX64::xmmword)
|
||||
build.vmovsd(arg.target, arg.source);
|
||||
else
|
||||
build.mov(arg.target, arg.source);
|
||||
}
|
||||
}
|
||||
|
||||
void IrCallWrapperX64::freeSourceRegisters(CallArgument& arg)
|
||||
{
|
||||
removeRegisterUse(arg.source.base);
|
||||
removeRegisterUse(arg.source.index);
|
||||
}
|
||||
|
||||
void IrCallWrapperX64::renameRegister(RegisterX64& target, RegisterX64 reg, RegisterX64 replacement)
|
||||
{
|
||||
if (sameUnderlyingRegister(target, reg))
|
||||
{
|
||||
addRegisterUse(replacement);
|
||||
removeRegisterUse(target);
|
||||
|
||||
target.index = replacement.index; // Only change index, size is preserved
|
||||
}
|
||||
}
|
||||
|
||||
void IrCallWrapperX64::renameSourceRegisters(RegisterX64 reg, RegisterX64 replacement)
|
||||
{
|
||||
for (int i = 0; i < argCount; ++i)
|
||||
{
|
||||
CallArgument& arg = args[i];
|
||||
|
||||
if (arg.candidate)
|
||||
{
|
||||
renameRegister(arg.source.base, reg, replacement);
|
||||
renameRegister(arg.source.index, reg, replacement);
|
||||
}
|
||||
}
|
||||
|
||||
renameRegister(funcOp.base, reg, replacement);
|
||||
renameRegister(funcOp.index, reg, replacement);
|
||||
}
|
||||
|
||||
RegisterX64 IrCallWrapperX64::findConflictingTarget() const
|
||||
{
|
||||
for (int i = 0; i < argCount; ++i)
|
||||
{
|
||||
const CallArgument& arg = args[i];
|
||||
|
||||
if (arg.candidate)
|
||||
{
|
||||
if (interferesWithActiveTarget(arg.source.base))
|
||||
return arg.source.base;
|
||||
|
||||
if (interferesWithActiveTarget(arg.source.index))
|
||||
return arg.source.index;
|
||||
}
|
||||
}
|
||||
|
||||
if (interferesWithActiveTarget(funcOp.base))
|
||||
return funcOp.base;
|
||||
|
||||
if (interferesWithActiveTarget(funcOp.index))
|
||||
return funcOp.index;
|
||||
|
||||
return noreg;
|
||||
}
|
||||
|
||||
void IrCallWrapperX64::renameConflictingRegister(RegisterX64 conflict)
|
||||
{
|
||||
// Get a fresh register
|
||||
RegisterX64 freshReg = regs.allocReg(conflict.size, kInvalidInstIdx);
|
||||
|
||||
if (conflict.size == SizeX64::xmmword)
|
||||
build.vmovsd(freshReg, conflict, conflict);
|
||||
else
|
||||
build.mov(freshReg, conflict);
|
||||
|
||||
renameSourceRegisters(conflict, freshReg);
|
||||
}
|
||||
|
||||
int IrCallWrapperX64::getRegisterUses(RegisterX64 reg) const
|
||||
{
|
||||
return reg.size == SizeX64::xmmword ? xmmUses[reg.index] : (reg.size != SizeX64::none ? gprUses[reg.index] : 0);
|
||||
}
|
||||
|
||||
void IrCallWrapperX64::addRegisterUse(RegisterX64 reg)
|
||||
{
|
||||
if (reg.size == SizeX64::xmmword)
|
||||
xmmUses[reg.index]++;
|
||||
else if (reg.size != SizeX64::none)
|
||||
gprUses[reg.index]++;
|
||||
}
|
||||
|
||||
void IrCallWrapperX64::removeRegisterUse(RegisterX64 reg)
|
||||
{
|
||||
if (reg.size == SizeX64::xmmword)
|
||||
{
|
||||
LUAU_ASSERT(xmmUses[reg.index] != 0);
|
||||
xmmUses[reg.index]--;
|
||||
|
||||
if (xmmUses[reg.index] == 0) // we don't use persistent xmm regs so no need to call shouldFreeRegister
|
||||
regs.freeReg(reg);
|
||||
}
|
||||
else if (reg.size != SizeX64::none)
|
||||
{
|
||||
LUAU_ASSERT(gprUses[reg.index] != 0);
|
||||
gprUses[reg.index]--;
|
||||
|
||||
if (gprUses[reg.index] == 0 && regs.shouldFreeGpr(reg))
|
||||
regs.freeReg(reg);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace X64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,766 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "Luau/IrDump.h"
|
||||
|
||||
#include "Luau/IrUtils.h"
|
||||
|
||||
#include "lua.h"
|
||||
|
||||
#include <stdarg.h>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
static const char* textForCondition[] = {
|
||||
"eq", "not_eq", "lt", "not_lt", "le", "not_le", "gt", "not_gt", "ge", "not_ge", "u_lt", "u_le", "u_gt", "u_ge"};
|
||||
static_assert(sizeof(textForCondition) / sizeof(textForCondition[0]) == size_t(IrCondition::Count), "all conditions have to be covered");
|
||||
|
||||
const int kDetailsAlignColumn = 60;
|
||||
|
||||
LUAU_PRINTF_ATTR(2, 3)
|
||||
static void append(std::string& result, const char* fmt, ...)
|
||||
{
|
||||
char buf[256];
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
vsnprintf(buf, sizeof(buf), fmt, args);
|
||||
va_end(args);
|
||||
result.append(buf);
|
||||
}
|
||||
|
||||
static void padToDetailColumn(std::string& result, size_t lineStart)
|
||||
{
|
||||
int pad = kDetailsAlignColumn - int(result.size() - lineStart);
|
||||
|
||||
if (pad > 0)
|
||||
result.append(pad, ' ');
|
||||
}
|
||||
|
||||
static const char* getTagName(uint8_t tag)
|
||||
{
|
||||
switch (tag)
|
||||
{
|
||||
case LUA_TNIL:
|
||||
return "tnil";
|
||||
case LUA_TBOOLEAN:
|
||||
return "tboolean";
|
||||
case LUA_TLIGHTUSERDATA:
|
||||
return "tlightuserdata";
|
||||
case LUA_TNUMBER:
|
||||
return "tnumber";
|
||||
case LUA_TVECTOR:
|
||||
return "tvector";
|
||||
case LUA_TSTRING:
|
||||
return "tstring";
|
||||
case LUA_TTABLE:
|
||||
return "ttable";
|
||||
case LUA_TFUNCTION:
|
||||
return "tfunction";
|
||||
case LUA_TUSERDATA:
|
||||
return "tuserdata";
|
||||
case LUA_TTHREAD:
|
||||
return "tthread";
|
||||
default:
|
||||
LUAU_ASSERT(!"Unknown type tag");
|
||||
LUAU_UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
const char* getCmdName(IrCmd cmd)
|
||||
{
|
||||
switch (cmd)
|
||||
{
|
||||
case IrCmd::NOP:
|
||||
return "NOP";
|
||||
case IrCmd::LOAD_TAG:
|
||||
return "LOAD_TAG";
|
||||
case IrCmd::LOAD_POINTER:
|
||||
return "LOAD_POINTER";
|
||||
case IrCmd::LOAD_DOUBLE:
|
||||
return "LOAD_DOUBLE";
|
||||
case IrCmd::LOAD_INT:
|
||||
return "LOAD_INT";
|
||||
case IrCmd::LOAD_TVALUE:
|
||||
return "LOAD_TVALUE";
|
||||
case IrCmd::LOAD_NODE_VALUE_TV:
|
||||
return "LOAD_NODE_VALUE_TV";
|
||||
case IrCmd::LOAD_ENV:
|
||||
return "LOAD_ENV";
|
||||
case IrCmd::GET_ARR_ADDR:
|
||||
return "GET_ARR_ADDR";
|
||||
case IrCmd::GET_SLOT_NODE_ADDR:
|
||||
return "GET_SLOT_NODE_ADDR";
|
||||
case IrCmd::GET_HASH_NODE_ADDR:
|
||||
return "GET_HASH_NODE_ADDR";
|
||||
case IrCmd::STORE_TAG:
|
||||
return "STORE_TAG";
|
||||
case IrCmd::STORE_POINTER:
|
||||
return "STORE_POINTER";
|
||||
case IrCmd::STORE_DOUBLE:
|
||||
return "STORE_DOUBLE";
|
||||
case IrCmd::STORE_INT:
|
||||
return "STORE_INT";
|
||||
case IrCmd::STORE_VECTOR:
|
||||
return "STORE_VECTOR";
|
||||
case IrCmd::STORE_TVALUE:
|
||||
return "STORE_TVALUE";
|
||||
case IrCmd::STORE_NODE_VALUE_TV:
|
||||
return "STORE_NODE_VALUE_TV";
|
||||
case IrCmd::ADD_INT:
|
||||
return "ADD_INT";
|
||||
case IrCmd::SUB_INT:
|
||||
return "SUB_INT";
|
||||
case IrCmd::ADD_NUM:
|
||||
return "ADD_NUM";
|
||||
case IrCmd::SUB_NUM:
|
||||
return "SUB_NUM";
|
||||
case IrCmd::MUL_NUM:
|
||||
return "MUL_NUM";
|
||||
case IrCmd::DIV_NUM:
|
||||
return "DIV_NUM";
|
||||
case IrCmd::MOD_NUM:
|
||||
return "MOD_NUM";
|
||||
case IrCmd::MIN_NUM:
|
||||
return "MIN_NUM";
|
||||
case IrCmd::MAX_NUM:
|
||||
return "MAX_NUM";
|
||||
case IrCmd::UNM_NUM:
|
||||
return "UNM_NUM";
|
||||
case IrCmd::FLOOR_NUM:
|
||||
return "FLOOR_NUM";
|
||||
case IrCmd::CEIL_NUM:
|
||||
return "CEIL_NUM";
|
||||
case IrCmd::ROUND_NUM:
|
||||
return "ROUND_NUM";
|
||||
case IrCmd::SQRT_NUM:
|
||||
return "SQRT_NUM";
|
||||
case IrCmd::ABS_NUM:
|
||||
return "ABS_NUM";
|
||||
case IrCmd::NOT_ANY:
|
||||
return "NOT_ANY";
|
||||
case IrCmd::JUMP:
|
||||
return "JUMP";
|
||||
case IrCmd::JUMP_IF_TRUTHY:
|
||||
return "JUMP_IF_TRUTHY";
|
||||
case IrCmd::JUMP_IF_FALSY:
|
||||
return "JUMP_IF_FALSY";
|
||||
case IrCmd::JUMP_EQ_TAG:
|
||||
return "JUMP_EQ_TAG";
|
||||
case IrCmd::JUMP_EQ_INT:
|
||||
return "JUMP_EQ_INT";
|
||||
case IrCmd::JUMP_LT_INT:
|
||||
return "JUMP_LT_INT";
|
||||
case IrCmd::JUMP_GE_UINT:
|
||||
return "JUMP_GE_UINT";
|
||||
case IrCmd::JUMP_EQ_POINTER:
|
||||
return "JUMP_EQ_POINTER";
|
||||
case IrCmd::JUMP_CMP_NUM:
|
||||
return "JUMP_CMP_NUM";
|
||||
case IrCmd::JUMP_CMP_ANY:
|
||||
return "JUMP_CMP_ANY";
|
||||
case IrCmd::JUMP_SLOT_MATCH:
|
||||
return "JUMP_SLOT_MATCH";
|
||||
case IrCmd::TABLE_LEN:
|
||||
return "TABLE_LEN";
|
||||
case IrCmd::NEW_TABLE:
|
||||
return "NEW_TABLE";
|
||||
case IrCmd::DUP_TABLE:
|
||||
return "DUP_TABLE";
|
||||
case IrCmd::TRY_NUM_TO_INDEX:
|
||||
return "TRY_NUM_TO_INDEX";
|
||||
case IrCmd::TRY_CALL_FASTGETTM:
|
||||
return "TRY_CALL_FASTGETTM";
|
||||
case IrCmd::INT_TO_NUM:
|
||||
return "INT_TO_NUM";
|
||||
case IrCmd::UINT_TO_NUM:
|
||||
return "UINT_TO_NUM";
|
||||
case IrCmd::NUM_TO_INT:
|
||||
return "NUM_TO_INT";
|
||||
case IrCmd::NUM_TO_UINT:
|
||||
return "NUM_TO_UINT";
|
||||
case IrCmd::ADJUST_STACK_TO_REG:
|
||||
return "ADJUST_STACK_TO_REG";
|
||||
case IrCmd::ADJUST_STACK_TO_TOP:
|
||||
return "ADJUST_STACK_TO_TOP";
|
||||
case IrCmd::FASTCALL:
|
||||
return "FASTCALL";
|
||||
case IrCmd::INVOKE_FASTCALL:
|
||||
return "INVOKE_FASTCALL";
|
||||
case IrCmd::CHECK_FASTCALL_RES:
|
||||
return "CHECK_FASTCALL_RES";
|
||||
case IrCmd::DO_ARITH:
|
||||
return "DO_ARITH";
|
||||
case IrCmd::DO_LEN:
|
||||
return "DO_LEN";
|
||||
case IrCmd::GET_TABLE:
|
||||
return "GET_TABLE";
|
||||
case IrCmd::SET_TABLE:
|
||||
return "SET_TABLE";
|
||||
case IrCmd::GET_IMPORT:
|
||||
return "GET_IMPORT";
|
||||
case IrCmd::CONCAT:
|
||||
return "CONCAT";
|
||||
case IrCmd::GET_UPVALUE:
|
||||
return "GET_UPVALUE";
|
||||
case IrCmd::SET_UPVALUE:
|
||||
return "SET_UPVALUE";
|
||||
case IrCmd::PREPARE_FORN:
|
||||
return "PREPARE_FORN";
|
||||
case IrCmd::CHECK_TAG:
|
||||
return "CHECK_TAG";
|
||||
case IrCmd::CHECK_READONLY:
|
||||
return "CHECK_READONLY";
|
||||
case IrCmd::CHECK_NO_METATABLE:
|
||||
return "CHECK_NO_METATABLE";
|
||||
case IrCmd::CHECK_SAFE_ENV:
|
||||
return "CHECK_SAFE_ENV";
|
||||
case IrCmd::CHECK_ARRAY_SIZE:
|
||||
return "CHECK_ARRAY_SIZE";
|
||||
case IrCmd::CHECK_SLOT_MATCH:
|
||||
return "CHECK_SLOT_MATCH";
|
||||
case IrCmd::CHECK_NODE_NO_NEXT:
|
||||
return "CHECK_NODE_NO_NEXT";
|
||||
case IrCmd::INTERRUPT:
|
||||
return "INTERRUPT";
|
||||
case IrCmd::CHECK_GC:
|
||||
return "CHECK_GC";
|
||||
case IrCmd::BARRIER_OBJ:
|
||||
return "BARRIER_OBJ";
|
||||
case IrCmd::BARRIER_TABLE_BACK:
|
||||
return "BARRIER_TABLE_BACK";
|
||||
case IrCmd::BARRIER_TABLE_FORWARD:
|
||||
return "BARRIER_TABLE_FORWARD";
|
||||
case IrCmd::SET_SAVEDPC:
|
||||
return "SET_SAVEDPC";
|
||||
case IrCmd::CLOSE_UPVALS:
|
||||
return "CLOSE_UPVALS";
|
||||
case IrCmd::CAPTURE:
|
||||
return "CAPTURE";
|
||||
case IrCmd::SETLIST:
|
||||
return "SETLIST";
|
||||
case IrCmd::CALL:
|
||||
return "CALL";
|
||||
case IrCmd::RETURN:
|
||||
return "RETURN";
|
||||
case IrCmd::FORGLOOP:
|
||||
return "FORGLOOP";
|
||||
case IrCmd::FORGLOOP_FALLBACK:
|
||||
return "FORGLOOP_FALLBACK";
|
||||
case IrCmd::FORGPREP_XNEXT_FALLBACK:
|
||||
return "FORGPREP_XNEXT_FALLBACK";
|
||||
case IrCmd::COVERAGE:
|
||||
return "COVERAGE";
|
||||
case IrCmd::FALLBACK_GETGLOBAL:
|
||||
return "FALLBACK_GETGLOBAL";
|
||||
case IrCmd::FALLBACK_SETGLOBAL:
|
||||
return "FALLBACK_SETGLOBAL";
|
||||
case IrCmd::FALLBACK_GETTABLEKS:
|
||||
return "FALLBACK_GETTABLEKS";
|
||||
case IrCmd::FALLBACK_SETTABLEKS:
|
||||
return "FALLBACK_SETTABLEKS";
|
||||
case IrCmd::FALLBACK_NAMECALL:
|
||||
return "FALLBACK_NAMECALL";
|
||||
case IrCmd::FALLBACK_PREPVARARGS:
|
||||
return "FALLBACK_PREPVARARGS";
|
||||
case IrCmd::FALLBACK_GETVARARGS:
|
||||
return "FALLBACK_GETVARARGS";
|
||||
case IrCmd::FALLBACK_NEWCLOSURE:
|
||||
return "FALLBACK_NEWCLOSURE";
|
||||
case IrCmd::FALLBACK_DUPCLOSURE:
|
||||
return "FALLBACK_DUPCLOSURE";
|
||||
case IrCmd::FALLBACK_FORGPREP:
|
||||
return "FALLBACK_FORGPREP";
|
||||
case IrCmd::SUBSTITUTE:
|
||||
return "SUBSTITUTE";
|
||||
case IrCmd::BITAND_UINT:
|
||||
return "BITAND_UINT";
|
||||
case IrCmd::BITXOR_UINT:
|
||||
return "BITXOR_UINT";
|
||||
case IrCmd::BITOR_UINT:
|
||||
return "BITOR_UINT";
|
||||
case IrCmd::BITNOT_UINT:
|
||||
return "BITNOT_UINT";
|
||||
case IrCmd::BITLSHIFT_UINT:
|
||||
return "BITLSHIFT_UINT";
|
||||
case IrCmd::BITRSHIFT_UINT:
|
||||
return "BITRSHIFT_UINT";
|
||||
case IrCmd::BITARSHIFT_UINT:
|
||||
return "BITARSHIFT_UINT";
|
||||
case IrCmd::BITLROTATE_UINT:
|
||||
return "BITLROTATE_UINT";
|
||||
case IrCmd::BITRROTATE_UINT:
|
||||
return "BITRROTATE_UINT";
|
||||
case IrCmd::BITCOUNTLZ_UINT:
|
||||
return "BITCOUNTLZ_UINT";
|
||||
case IrCmd::BITCOUNTRZ_UINT:
|
||||
return "BITCOUNTRZ_UINT";
|
||||
case IrCmd::INVOKE_LIBM:
|
||||
return "INVOKE_LIBM";
|
||||
}
|
||||
|
||||
LUAU_UNREACHABLE();
|
||||
}
|
||||
|
||||
const char* getBlockKindName(IrBlockKind kind)
|
||||
{
|
||||
switch (kind)
|
||||
{
|
||||
case IrBlockKind::Bytecode:
|
||||
return "bb_bytecode";
|
||||
case IrBlockKind::Fallback:
|
||||
return "bb_fallback";
|
||||
case IrBlockKind::Internal:
|
||||
return "bb";
|
||||
case IrBlockKind::Linearized:
|
||||
return "bb_linear";
|
||||
case IrBlockKind::Dead:
|
||||
return "dead";
|
||||
}
|
||||
|
||||
LUAU_UNREACHABLE();
|
||||
}
|
||||
|
||||
void toString(IrToStringContext& ctx, const IrInst& inst, uint32_t index)
|
||||
{
|
||||
append(ctx.result, " ");
|
||||
|
||||
// Instructions with a result display target virtual register
|
||||
if (hasResult(inst.cmd))
|
||||
append(ctx.result, "%%%u = ", index);
|
||||
|
||||
ctx.result.append(getCmdName(inst.cmd));
|
||||
|
||||
auto checkOp = [&ctx](IrOp op, const char* sep) {
|
||||
if (op.kind != IrOpKind::None)
|
||||
{
|
||||
ctx.result.append(sep);
|
||||
toString(ctx, op);
|
||||
}
|
||||
};
|
||||
|
||||
checkOp(inst.a, " ");
|
||||
checkOp(inst.b, ", ");
|
||||
checkOp(inst.c, ", ");
|
||||
checkOp(inst.d, ", ");
|
||||
checkOp(inst.e, ", ");
|
||||
checkOp(inst.f, ", ");
|
||||
}
|
||||
|
||||
void toString(IrToStringContext& ctx, const IrBlock& block, uint32_t index)
|
||||
{
|
||||
append(ctx.result, "%s_%u", getBlockKindName(block.kind), index);
|
||||
}
|
||||
|
||||
void toString(IrToStringContext& ctx, IrOp op)
|
||||
{
|
||||
switch (op.kind)
|
||||
{
|
||||
case IrOpKind::None:
|
||||
break;
|
||||
case IrOpKind::Undef:
|
||||
append(ctx.result, "undef");
|
||||
break;
|
||||
case IrOpKind::Constant:
|
||||
toString(ctx.result, ctx.constants[op.index]);
|
||||
break;
|
||||
case IrOpKind::Condition:
|
||||
LUAU_ASSERT(op.index < uint32_t(IrCondition::Count));
|
||||
ctx.result.append(textForCondition[op.index]);
|
||||
break;
|
||||
case IrOpKind::Inst:
|
||||
append(ctx.result, "%%%u", op.index);
|
||||
break;
|
||||
case IrOpKind::Block:
|
||||
append(ctx.result, "%s_%u", getBlockKindName(ctx.blocks[op.index].kind), op.index);
|
||||
break;
|
||||
case IrOpKind::VmReg:
|
||||
append(ctx.result, "R%d", vmRegOp(op));
|
||||
break;
|
||||
case IrOpKind::VmConst:
|
||||
append(ctx.result, "K%d", vmConstOp(op));
|
||||
break;
|
||||
case IrOpKind::VmUpvalue:
|
||||
append(ctx.result, "U%d", vmUpvalueOp(op));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void toString(std::string& result, IrConst constant)
|
||||
{
|
||||
switch (constant.kind)
|
||||
{
|
||||
case IrConstKind::Bool:
|
||||
append(result, constant.valueBool ? "true" : "false");
|
||||
break;
|
||||
case IrConstKind::Int:
|
||||
append(result, "%di", constant.valueInt);
|
||||
break;
|
||||
case IrConstKind::Uint:
|
||||
append(result, "%uu", constant.valueUint);
|
||||
break;
|
||||
case IrConstKind::Double:
|
||||
if (constant.valueDouble != constant.valueDouble)
|
||||
append(result, "nan");
|
||||
else
|
||||
append(result, "%.17g", constant.valueDouble);
|
||||
break;
|
||||
case IrConstKind::Tag:
|
||||
result.append(getTagName(constant.valueTag));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void appendBlockSet(IrToStringContext& ctx, BlockIteratorWrapper blocks)
|
||||
{
|
||||
bool comma = false;
|
||||
|
||||
for (uint32_t target : blocks)
|
||||
{
|
||||
if (comma)
|
||||
append(ctx.result, ", ");
|
||||
comma = true;
|
||||
|
||||
toString(ctx, ctx.blocks[target], target);
|
||||
}
|
||||
}
|
||||
|
||||
static void appendRegisterSet(IrToStringContext& ctx, const RegisterSet& rs, const char* separator)
|
||||
{
|
||||
bool comma = false;
|
||||
|
||||
for (size_t i = 0; i < rs.regs.size(); i++)
|
||||
{
|
||||
if (rs.regs.test(i))
|
||||
{
|
||||
if (comma)
|
||||
ctx.result.append(separator);
|
||||
comma = true;
|
||||
|
||||
append(ctx.result, "R%d", int(i));
|
||||
}
|
||||
}
|
||||
|
||||
if (rs.varargSeq)
|
||||
{
|
||||
if (comma)
|
||||
ctx.result.append(separator);
|
||||
|
||||
append(ctx.result, "R%d...", rs.varargStart);
|
||||
}
|
||||
}
|
||||
|
||||
static RegisterSet getJumpTargetExtraLiveIn(IrToStringContext& ctx, const IrBlock& block, uint32_t blockIdx, const IrInst& inst)
|
||||
{
|
||||
RegisterSet extraRs;
|
||||
|
||||
if (blockIdx >= ctx.cfg.in.size())
|
||||
return extraRs;
|
||||
|
||||
const RegisterSet& defRs = ctx.cfg.in[blockIdx];
|
||||
|
||||
// Find first block argument, for guard instructions (isNonTerminatingJump), that's the first and only one
|
||||
LUAU_ASSERT(isNonTerminatingJump(inst.cmd));
|
||||
IrOp op = inst.a;
|
||||
|
||||
if (inst.b.kind == IrOpKind::Block)
|
||||
op = inst.b;
|
||||
else if (inst.c.kind == IrOpKind::Block)
|
||||
op = inst.c;
|
||||
else if (inst.d.kind == IrOpKind::Block)
|
||||
op = inst.d;
|
||||
else if (inst.e.kind == IrOpKind::Block)
|
||||
op = inst.e;
|
||||
else if (inst.f.kind == IrOpKind::Block)
|
||||
op = inst.f;
|
||||
|
||||
if (op.kind == IrOpKind::Block && op.index < ctx.cfg.in.size())
|
||||
{
|
||||
const RegisterSet& inRs = ctx.cfg.in[op.index];
|
||||
|
||||
extraRs.regs = inRs.regs & ~defRs.regs;
|
||||
|
||||
if (inRs.varargSeq)
|
||||
requireVariadicSequence(extraRs, defRs, inRs.varargStart);
|
||||
}
|
||||
|
||||
return extraRs;
|
||||
}
|
||||
|
||||
void toStringDetailed(IrToStringContext& ctx, const IrBlock& block, uint32_t blockIdx, const IrInst& inst, uint32_t instIdx, bool includeUseInfo)
|
||||
{
|
||||
size_t start = ctx.result.size();
|
||||
|
||||
toString(ctx, inst, instIdx);
|
||||
|
||||
if (includeUseInfo)
|
||||
{
|
||||
padToDetailColumn(ctx.result, start);
|
||||
|
||||
if (inst.useCount == 0 && hasSideEffects(inst.cmd))
|
||||
{
|
||||
if (isNonTerminatingJump(inst.cmd))
|
||||
{
|
||||
RegisterSet extraRs = getJumpTargetExtraLiveIn(ctx, block, blockIdx, inst);
|
||||
|
||||
if (extraRs.regs.any() || extraRs.varargSeq)
|
||||
{
|
||||
append(ctx.result, "; %%%u, extra in: ", instIdx);
|
||||
appendRegisterSet(ctx, extraRs, ", ");
|
||||
ctx.result.append("\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
append(ctx.result, "; %%%u\n", instIdx);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
append(ctx.result, "; %%%u\n", instIdx);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
append(ctx.result, "; useCount: %d, lastUse: %%%u\n", inst.useCount, inst.lastUse);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ctx.result.append("\n");
|
||||
}
|
||||
}
|
||||
|
||||
void toStringDetailed(IrToStringContext& ctx, const IrBlock& block, uint32_t index, bool includeUseInfo)
|
||||
{
|
||||
// Report captured registers for entry block
|
||||
if (block.useCount == 0 && block.kind != IrBlockKind::Dead && ctx.cfg.captured.regs.any())
|
||||
{
|
||||
append(ctx.result, "; captured regs: ");
|
||||
appendRegisterSet(ctx, ctx.cfg.captured, ", ");
|
||||
append(ctx.result, "\n\n");
|
||||
}
|
||||
|
||||
size_t start = ctx.result.size();
|
||||
|
||||
toString(ctx, block, index);
|
||||
append(ctx.result, ":");
|
||||
|
||||
if (includeUseInfo)
|
||||
{
|
||||
padToDetailColumn(ctx.result, start);
|
||||
|
||||
append(ctx.result, "; useCount: %d\n", block.useCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
ctx.result.append("\n");
|
||||
}
|
||||
|
||||
// Predecessor list
|
||||
if (index < ctx.cfg.predecessorsOffsets.size())
|
||||
{
|
||||
BlockIteratorWrapper pred = predecessors(ctx.cfg, index);
|
||||
|
||||
if (!pred.empty())
|
||||
{
|
||||
append(ctx.result, "; predecessors: ");
|
||||
|
||||
appendBlockSet(ctx, pred);
|
||||
append(ctx.result, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Successor list
|
||||
if (index < ctx.cfg.successorsOffsets.size())
|
||||
{
|
||||
BlockIteratorWrapper succ = successors(ctx.cfg, index);
|
||||
|
||||
if (!succ.empty())
|
||||
{
|
||||
append(ctx.result, "; successors: ");
|
||||
|
||||
appendBlockSet(ctx, succ);
|
||||
append(ctx.result, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Live-in VM regs
|
||||
if (index < ctx.cfg.in.size())
|
||||
{
|
||||
const RegisterSet& in = ctx.cfg.in[index];
|
||||
|
||||
if (in.regs.any() || in.varargSeq)
|
||||
{
|
||||
append(ctx.result, "; in regs: ");
|
||||
appendRegisterSet(ctx, in, ", ");
|
||||
append(ctx.result, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Live-out VM regs
|
||||
if (index < ctx.cfg.out.size())
|
||||
{
|
||||
const RegisterSet& out = ctx.cfg.out[index];
|
||||
|
||||
if (out.regs.any() || out.varargSeq)
|
||||
{
|
||||
append(ctx.result, "; out regs: ");
|
||||
appendRegisterSet(ctx, out, ", ");
|
||||
append(ctx.result, "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string toString(const IrFunction& function, bool includeUseInfo)
|
||||
{
|
||||
std::string result;
|
||||
IrToStringContext ctx{result, function.blocks, function.constants, function.cfg};
|
||||
|
||||
for (size_t i = 0; i < function.blocks.size(); i++)
|
||||
{
|
||||
const IrBlock& block = function.blocks[i];
|
||||
|
||||
if (block.kind == IrBlockKind::Dead)
|
||||
continue;
|
||||
|
||||
toStringDetailed(ctx, block, uint32_t(i), includeUseInfo);
|
||||
|
||||
if (block.start == ~0u)
|
||||
{
|
||||
append(ctx.result, " *empty*\n\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
// To allow dumping blocks that are still being constructed, we can't rely on terminator and need a bounds check
|
||||
for (uint32_t index = block.start; index <= block.finish && index < uint32_t(function.instructions.size()); index++)
|
||||
{
|
||||
const IrInst& inst = function.instructions[index];
|
||||
|
||||
// Skip pseudo instructions unless they are still referenced
|
||||
if (isPseudo(inst.cmd) && inst.useCount == 0)
|
||||
continue;
|
||||
|
||||
append(ctx.result, " ");
|
||||
toStringDetailed(ctx, block, uint32_t(i), inst, index, includeUseInfo);
|
||||
}
|
||||
|
||||
append(ctx.result, "\n");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string dump(const IrFunction& function)
|
||||
{
|
||||
std::string result = toString(function, /* includeUseInfo */ true);
|
||||
|
||||
printf("%s\n", result.c_str());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string toDot(const IrFunction& function, bool includeInst)
|
||||
{
|
||||
std::string result;
|
||||
IrToStringContext ctx{result, function.blocks, function.constants, function.cfg};
|
||||
|
||||
auto appendLabelRegset = [&ctx](const std::vector<RegisterSet>& regSets, size_t blockIdx, const char* name) {
|
||||
if (blockIdx < regSets.size())
|
||||
{
|
||||
const RegisterSet& rs = regSets[blockIdx];
|
||||
|
||||
if (rs.regs.any() || rs.varargSeq)
|
||||
{
|
||||
append(ctx.result, "|{%s|", name);
|
||||
appendRegisterSet(ctx, rs, "|");
|
||||
append(ctx.result, "}");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
append(ctx.result, "digraph CFG {\n");
|
||||
append(ctx.result, "node[shape=record]\n");
|
||||
|
||||
for (size_t i = 0; i < function.blocks.size(); i++)
|
||||
{
|
||||
const IrBlock& block = function.blocks[i];
|
||||
|
||||
append(ctx.result, "b%u [", unsigned(i));
|
||||
|
||||
if (block.kind == IrBlockKind::Fallback)
|
||||
append(ctx.result, "style=filled;fillcolor=salmon;");
|
||||
else if (block.kind == IrBlockKind::Bytecode)
|
||||
append(ctx.result, "style=filled;fillcolor=palegreen;");
|
||||
|
||||
append(ctx.result, "label=\"{");
|
||||
toString(ctx, block, uint32_t(i));
|
||||
|
||||
appendLabelRegset(ctx.cfg.in, i, "in");
|
||||
|
||||
if (includeInst && block.start != ~0u)
|
||||
{
|
||||
for (uint32_t instIdx = block.start; instIdx <= block.finish; instIdx++)
|
||||
{
|
||||
const IrInst& inst = function.instructions[instIdx];
|
||||
|
||||
// Skip pseudo instructions unless they are still referenced
|
||||
if (isPseudo(inst.cmd) && inst.useCount == 0)
|
||||
continue;
|
||||
|
||||
append(ctx.result, "|");
|
||||
toString(ctx, inst, instIdx);
|
||||
}
|
||||
}
|
||||
|
||||
appendLabelRegset(ctx.cfg.def, i, "def");
|
||||
appendLabelRegset(ctx.cfg.out, i, "out");
|
||||
|
||||
append(ctx.result, "}\"];\n");
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < function.blocks.size(); i++)
|
||||
{
|
||||
const IrBlock& block = function.blocks[i];
|
||||
|
||||
if (block.start == ~0u)
|
||||
continue;
|
||||
|
||||
for (uint32_t instIdx = block.start; instIdx != ~0u && instIdx <= block.finish; instIdx++)
|
||||
{
|
||||
const IrInst& inst = function.instructions[instIdx];
|
||||
|
||||
auto checkOp = [&](IrOp op) {
|
||||
if (op.kind == IrOpKind::Block)
|
||||
{
|
||||
if (function.blocks[op.index].kind != IrBlockKind::Fallback)
|
||||
append(ctx.result, "b%u -> b%u [weight=10];\n", unsigned(i), op.index);
|
||||
else
|
||||
append(ctx.result, "b%u -> b%u;\n", unsigned(i), op.index);
|
||||
}
|
||||
};
|
||||
|
||||
checkOp(inst.a);
|
||||
checkOp(inst.b);
|
||||
checkOp(inst.c);
|
||||
checkOp(inst.d);
|
||||
checkOp(inst.e);
|
||||
checkOp(inst.f);
|
||||
}
|
||||
}
|
||||
|
||||
append(ctx.result, "}\n");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string dumpDot(const IrFunction& function, bool includeInst)
|
||||
{
|
||||
std::string result = toDot(function, includeInst);
|
||||
|
||||
printf("%s\n", result.c_str());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,75 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/AssemblyBuilderA64.h"
|
||||
#include "Luau/IrData.h"
|
||||
|
||||
#include "IrRegAllocA64.h"
|
||||
#include "IrValueLocationTracking.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
struct Proto;
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
struct ModuleHelpers;
|
||||
struct NativeState;
|
||||
struct AssemblyOptions;
|
||||
|
||||
namespace A64
|
||||
{
|
||||
|
||||
struct IrLoweringA64
|
||||
{
|
||||
IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, NativeState& data, Proto* proto, IrFunction& function);
|
||||
|
||||
void lowerInst(IrInst& inst, uint32_t index, IrBlock& next);
|
||||
void finishBlock();
|
||||
|
||||
bool hasError() const;
|
||||
|
||||
bool isFallthroughBlock(IrBlock target, IrBlock next);
|
||||
void jumpOrFallthrough(IrBlock& target, IrBlock& next);
|
||||
|
||||
// Operand data build helpers
|
||||
// May emit data/address synthesis instructions
|
||||
RegisterA64 tempDouble(IrOp op);
|
||||
RegisterA64 tempInt(IrOp op);
|
||||
RegisterA64 tempUint(IrOp op);
|
||||
AddressA64 tempAddr(IrOp op, int offset);
|
||||
|
||||
// May emit restore instructions
|
||||
RegisterA64 regOp(IrOp op);
|
||||
|
||||
// Operand data lookup helpers
|
||||
IrConst constOp(IrOp op) const;
|
||||
uint8_t tagOp(IrOp op) const;
|
||||
bool boolOp(IrOp op) const;
|
||||
int intOp(IrOp op) const;
|
||||
unsigned uintOp(IrOp op) const;
|
||||
double doubleOp(IrOp op) const;
|
||||
|
||||
IrBlock& blockOp(IrOp op) const;
|
||||
Label& labelOp(IrOp op) const;
|
||||
|
||||
AssemblyBuilderA64& build;
|
||||
ModuleHelpers& helpers;
|
||||
NativeState& data;
|
||||
Proto* proto = nullptr; // Temporarily required to provide 'Instruction* pc' to old emitInst* methods
|
||||
|
||||
IrFunction& function;
|
||||
|
||||
IrRegAllocA64 regs;
|
||||
|
||||
IrValueLocationTracking valueTracker;
|
||||
|
||||
bool error = false;
|
||||
};
|
||||
|
||||
} // namespace A64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,69 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/AssemblyBuilderX64.h"
|
||||
#include "Luau/IrData.h"
|
||||
#include "Luau/IrRegAllocX64.h"
|
||||
|
||||
#include "IrValueLocationTracking.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
struct Proto;
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
struct ModuleHelpers;
|
||||
struct NativeState;
|
||||
struct AssemblyOptions;
|
||||
|
||||
namespace X64
|
||||
{
|
||||
|
||||
struct IrLoweringX64
|
||||
{
|
||||
IrLoweringX64(AssemblyBuilderX64& build, ModuleHelpers& helpers, NativeState& data, IrFunction& function);
|
||||
|
||||
void lowerInst(IrInst& inst, uint32_t index, IrBlock& next);
|
||||
void finishBlock();
|
||||
|
||||
bool hasError() const;
|
||||
|
||||
bool isFallthroughBlock(IrBlock target, IrBlock next);
|
||||
void jumpOrFallthrough(IrBlock& target, IrBlock& next);
|
||||
|
||||
void storeDoubleAsFloat(OperandX64 dst, IrOp src);
|
||||
|
||||
// Operand data lookup helpers
|
||||
OperandX64 memRegDoubleOp(IrOp op);
|
||||
OperandX64 memRegUintOp(IrOp op);
|
||||
OperandX64 memRegTagOp(IrOp op);
|
||||
RegisterX64 regOp(IrOp op);
|
||||
|
||||
IrConst constOp(IrOp op) const;
|
||||
uint8_t tagOp(IrOp op) const;
|
||||
bool boolOp(IrOp op) const;
|
||||
int intOp(IrOp op) const;
|
||||
unsigned uintOp(IrOp op) const;
|
||||
double doubleOp(IrOp op) const;
|
||||
|
||||
IrBlock& blockOp(IrOp op) const;
|
||||
Label& labelOp(IrOp op) const;
|
||||
|
||||
AssemblyBuilderX64& build;
|
||||
ModuleHelpers& helpers;
|
||||
NativeState& data;
|
||||
|
||||
IrFunction& function;
|
||||
|
||||
IrRegAllocX64 regs;
|
||||
|
||||
IrValueLocationTracking valueTracker;
|
||||
};
|
||||
|
||||
} // namespace X64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,435 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "IrRegAllocA64.h"
|
||||
|
||||
#include "Luau/AssemblyBuilderA64.h"
|
||||
#include "Luau/IrUtils.h"
|
||||
|
||||
#include "BitUtils.h"
|
||||
#include "EmitCommonA64.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
LUAU_FASTFLAGVARIABLE(DebugLuauCodegenChaosA64, false)
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
namespace A64
|
||||
{
|
||||
|
||||
static int allocSpill(uint32_t& free, KindA64 kind)
|
||||
{
|
||||
LUAU_ASSERT(kStackSize <= 256); // to support larger stack frames, we need to ensure qN is allocated at 16b boundary to fit in ldr/str encoding
|
||||
|
||||
// qN registers use two consecutive slots
|
||||
int slot = countrz(kind == KindA64::q ? free & (free >> 1) : free);
|
||||
if (slot == 32)
|
||||
return -1;
|
||||
|
||||
uint32_t mask = (kind == KindA64::q ? 3u : 1u) << slot;
|
||||
|
||||
LUAU_ASSERT((free & mask) == mask);
|
||||
free &= ~mask;
|
||||
|
||||
return slot;
|
||||
}
|
||||
|
||||
static void freeSpill(uint32_t& free, KindA64 kind, uint8_t slot)
|
||||
{
|
||||
// qN registers use two consecutive slots
|
||||
uint32_t mask = (kind == KindA64::q ? 3u : 1u) << slot;
|
||||
|
||||
LUAU_ASSERT((free & mask) == 0);
|
||||
free |= mask;
|
||||
}
|
||||
|
||||
static int getReloadOffset(IrCmd cmd)
|
||||
{
|
||||
switch (getCmdValueKind(cmd))
|
||||
{
|
||||
case IrValueKind::Unknown:
|
||||
case IrValueKind::None:
|
||||
LUAU_ASSERT(!"Invalid operand restore value kind");
|
||||
break;
|
||||
case IrValueKind::Tag:
|
||||
return offsetof(TValue, tt);
|
||||
case IrValueKind::Int:
|
||||
return offsetof(TValue, value);
|
||||
case IrValueKind::Pointer:
|
||||
return offsetof(TValue, value.gc);
|
||||
case IrValueKind::Double:
|
||||
return offsetof(TValue, value.n);
|
||||
case IrValueKind::Tvalue:
|
||||
return 0;
|
||||
}
|
||||
|
||||
LUAU_ASSERT(!"Invalid operand restore value kind");
|
||||
LUAU_UNREACHABLE();
|
||||
}
|
||||
|
||||
static AddressA64 getReloadAddress(const IrFunction& function, const IrInst& inst)
|
||||
{
|
||||
IrOp location = function.findRestoreOp(inst);
|
||||
|
||||
if (location.kind == IrOpKind::VmReg)
|
||||
return mem(rBase, vmRegOp(location) * sizeof(TValue) + getReloadOffset(inst.cmd));
|
||||
|
||||
// loads are 4/8/16 bytes; we conservatively limit the offset to fit assuming a 4b index
|
||||
if (location.kind == IrOpKind::VmConst && vmConstOp(location) * sizeof(TValue) <= AddressA64::kMaxOffset * 4)
|
||||
return mem(rConstants, vmConstOp(location) * sizeof(TValue) + getReloadOffset(inst.cmd));
|
||||
|
||||
return AddressA64(xzr); // dummy
|
||||
}
|
||||
|
||||
static void restoreInst(AssemblyBuilderA64& build, uint32_t& freeSpillSlots, IrFunction& function, const IrRegAllocA64::Spill& s, RegisterA64 reg)
|
||||
{
|
||||
IrInst& inst = function.instructions[s.inst];
|
||||
LUAU_ASSERT(inst.regA64 == noreg);
|
||||
|
||||
if (s.slot >= 0)
|
||||
{
|
||||
build.ldr(reg, mem(sp, sSpillArea.data + s.slot * 8));
|
||||
|
||||
freeSpill(freeSpillSlots, reg.kind, s.slot);
|
||||
}
|
||||
else
|
||||
{
|
||||
LUAU_ASSERT(!inst.spilled && inst.needsReload);
|
||||
AddressA64 addr = getReloadAddress(function, function.instructions[s.inst]);
|
||||
LUAU_ASSERT(addr.base != xzr);
|
||||
build.ldr(reg, addr);
|
||||
}
|
||||
|
||||
inst.spilled = false;
|
||||
inst.needsReload = false;
|
||||
inst.regA64 = reg;
|
||||
}
|
||||
|
||||
IrRegAllocA64::IrRegAllocA64(IrFunction& function, std::initializer_list<std::pair<RegisterA64, RegisterA64>> regs)
|
||||
: function(function)
|
||||
{
|
||||
for (auto& p : regs)
|
||||
{
|
||||
LUAU_ASSERT(p.first.kind == p.second.kind && p.first.index <= p.second.index);
|
||||
|
||||
Set& set = getSet(p.first.kind);
|
||||
|
||||
for (int i = p.first.index; i <= p.second.index; ++i)
|
||||
set.base |= 1u << i;
|
||||
}
|
||||
|
||||
gpr.free = gpr.base;
|
||||
simd.free = simd.base;
|
||||
|
||||
memset(gpr.defs, -1, sizeof(gpr.defs));
|
||||
memset(simd.defs, -1, sizeof(simd.defs));
|
||||
|
||||
LUAU_ASSERT(kSpillSlots <= 32);
|
||||
freeSpillSlots = (kSpillSlots == 32) ? ~0u : (1u << kSpillSlots) - 1;
|
||||
}
|
||||
|
||||
RegisterA64 IrRegAllocA64::allocReg(KindA64 kind, uint32_t index)
|
||||
{
|
||||
Set& set = getSet(kind);
|
||||
|
||||
if (set.free == 0)
|
||||
{
|
||||
// TODO: remember the error and fail lowering
|
||||
LUAU_ASSERT(!"Out of registers to allocate");
|
||||
return noreg;
|
||||
}
|
||||
|
||||
int reg = 31 - countlz(set.free);
|
||||
|
||||
if (FFlag::DebugLuauCodegenChaosA64)
|
||||
reg = countrz(set.free); // allocate from low end; this causes extra conflicts for calls
|
||||
|
||||
set.free &= ~(1u << reg);
|
||||
set.defs[reg] = index;
|
||||
|
||||
return RegisterA64{kind, uint8_t(reg)};
|
||||
}
|
||||
|
||||
RegisterA64 IrRegAllocA64::allocTemp(KindA64 kind)
|
||||
{
|
||||
Set& set = getSet(kind);
|
||||
|
||||
if (set.free == 0)
|
||||
{
|
||||
// TODO: remember the error and fail lowering
|
||||
LUAU_ASSERT(!"Out of registers to allocate");
|
||||
return noreg;
|
||||
}
|
||||
|
||||
int reg = 31 - countlz(set.free);
|
||||
|
||||
if (FFlag::DebugLuauCodegenChaosA64)
|
||||
reg = countrz(set.free); // allocate from low end; this causes extra conflicts for calls
|
||||
|
||||
set.free &= ~(1u << reg);
|
||||
set.temp |= 1u << reg;
|
||||
LUAU_ASSERT(set.defs[reg] == kInvalidInstIdx);
|
||||
|
||||
return RegisterA64{kind, uint8_t(reg)};
|
||||
}
|
||||
|
||||
RegisterA64 IrRegAllocA64::allocReuse(KindA64 kind, uint32_t index, std::initializer_list<IrOp> oprefs)
|
||||
{
|
||||
for (IrOp op : oprefs)
|
||||
{
|
||||
if (op.kind != IrOpKind::Inst)
|
||||
continue;
|
||||
|
||||
IrInst& source = function.instructions[op.index];
|
||||
|
||||
if (source.lastUse == index && !source.reusedReg && source.regA64 != noreg)
|
||||
{
|
||||
LUAU_ASSERT(!source.spilled && !source.needsReload);
|
||||
LUAU_ASSERT(source.regA64.kind == kind);
|
||||
|
||||
Set& set = getSet(kind);
|
||||
LUAU_ASSERT(set.defs[source.regA64.index] == op.index);
|
||||
set.defs[source.regA64.index] = index;
|
||||
|
||||
source.reusedReg = true;
|
||||
return source.regA64;
|
||||
}
|
||||
}
|
||||
|
||||
return allocReg(kind, index);
|
||||
}
|
||||
|
||||
RegisterA64 IrRegAllocA64::takeReg(RegisterA64 reg, uint32_t index)
|
||||
{
|
||||
Set& set = getSet(reg.kind);
|
||||
|
||||
LUAU_ASSERT(set.free & (1u << reg.index));
|
||||
LUAU_ASSERT(set.defs[reg.index] == kInvalidInstIdx);
|
||||
|
||||
set.free &= ~(1u << reg.index);
|
||||
set.defs[reg.index] = index;
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
void IrRegAllocA64::freeReg(RegisterA64 reg)
|
||||
{
|
||||
Set& set = getSet(reg.kind);
|
||||
|
||||
LUAU_ASSERT((set.base & (1u << reg.index)) != 0);
|
||||
LUAU_ASSERT((set.free & (1u << reg.index)) == 0);
|
||||
LUAU_ASSERT((set.temp & (1u << reg.index)) == 0);
|
||||
|
||||
set.free |= 1u << reg.index;
|
||||
set.defs[reg.index] = kInvalidInstIdx;
|
||||
}
|
||||
|
||||
void IrRegAllocA64::freeLastUseReg(IrInst& target, uint32_t index)
|
||||
{
|
||||
if (target.lastUse == index && !target.reusedReg)
|
||||
{
|
||||
LUAU_ASSERT(!target.spilled && !target.needsReload);
|
||||
|
||||
// Register might have already been freed if it had multiple uses inside a single instruction
|
||||
if (target.regA64 == noreg)
|
||||
return;
|
||||
|
||||
freeReg(target.regA64);
|
||||
target.regA64 = noreg;
|
||||
}
|
||||
}
|
||||
|
||||
void IrRegAllocA64::freeLastUseRegs(const IrInst& inst, uint32_t index)
|
||||
{
|
||||
auto checkOp = [this, index](IrOp op) {
|
||||
if (op.kind == IrOpKind::Inst)
|
||||
freeLastUseReg(function.instructions[op.index], index);
|
||||
};
|
||||
|
||||
checkOp(inst.a);
|
||||
checkOp(inst.b);
|
||||
checkOp(inst.c);
|
||||
checkOp(inst.d);
|
||||
checkOp(inst.e);
|
||||
checkOp(inst.f);
|
||||
}
|
||||
|
||||
void IrRegAllocA64::freeTempRegs()
|
||||
{
|
||||
LUAU_ASSERT((gpr.free & gpr.temp) == 0);
|
||||
gpr.free |= gpr.temp;
|
||||
gpr.temp = 0;
|
||||
|
||||
LUAU_ASSERT((simd.free & simd.temp) == 0);
|
||||
simd.free |= simd.temp;
|
||||
simd.temp = 0;
|
||||
}
|
||||
|
||||
size_t IrRegAllocA64::spill(AssemblyBuilderA64& build, uint32_t index, std::initializer_list<RegisterA64> live)
|
||||
{
|
||||
static const KindA64 sets[] = {KindA64::x, KindA64::q};
|
||||
|
||||
size_t start = spills.size();
|
||||
|
||||
uint32_t poisongpr = 0;
|
||||
uint32_t poisonsimd = 0;
|
||||
|
||||
if (FFlag::DebugLuauCodegenChaosA64)
|
||||
{
|
||||
poisongpr = gpr.base & ~gpr.free;
|
||||
poisonsimd = simd.base & ~simd.free;
|
||||
|
||||
for (RegisterA64 reg : live)
|
||||
{
|
||||
Set& set = getSet(reg.kind);
|
||||
(&set == &simd ? poisonsimd : poisongpr) &= ~(1u << reg.index);
|
||||
}
|
||||
}
|
||||
|
||||
for (KindA64 kind : sets)
|
||||
{
|
||||
Set& set = getSet(kind);
|
||||
|
||||
// early-out
|
||||
if (set.free == set.base)
|
||||
continue;
|
||||
|
||||
// free all temp registers
|
||||
LUAU_ASSERT((set.free & set.temp) == 0);
|
||||
set.free |= set.temp;
|
||||
set.temp = 0;
|
||||
|
||||
// spill all allocated registers unless they aren't used anymore
|
||||
uint32_t regs = set.base & ~set.free;
|
||||
|
||||
while (regs)
|
||||
{
|
||||
int reg = 31 - countlz(regs);
|
||||
|
||||
uint32_t inst = set.defs[reg];
|
||||
LUAU_ASSERT(inst != kInvalidInstIdx);
|
||||
|
||||
IrInst& def = function.instructions[inst];
|
||||
LUAU_ASSERT(def.regA64.index == reg);
|
||||
LUAU_ASSERT(!def.reusedReg);
|
||||
LUAU_ASSERT(!def.spilled);
|
||||
LUAU_ASSERT(!def.needsReload);
|
||||
|
||||
if (def.lastUse == index)
|
||||
{
|
||||
// instead of spilling the register to never reload it, we assume the register is not needed anymore
|
||||
}
|
||||
else if (getReloadAddress(function, def).base != xzr)
|
||||
{
|
||||
// instead of spilling the register to stack, we can reload it from VM stack/constants
|
||||
// we still need to record the spill for restore(start) to work
|
||||
Spill s = {inst, def.regA64, -1};
|
||||
spills.push_back(s);
|
||||
|
||||
def.needsReload = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
int slot = allocSpill(freeSpillSlots, def.regA64.kind);
|
||||
LUAU_ASSERT(slot >= 0); // TODO: remember the error and fail lowering
|
||||
|
||||
build.str(def.regA64, mem(sp, sSpillArea.data + slot * 8));
|
||||
|
||||
Spill s = {inst, def.regA64, int8_t(slot)};
|
||||
spills.push_back(s);
|
||||
|
||||
def.spilled = true;
|
||||
}
|
||||
|
||||
def.regA64 = noreg;
|
||||
|
||||
regs &= ~(1u << reg);
|
||||
set.free |= 1u << reg;
|
||||
set.defs[reg] = kInvalidInstIdx;
|
||||
}
|
||||
|
||||
LUAU_ASSERT(set.free == set.base);
|
||||
}
|
||||
|
||||
if (FFlag::DebugLuauCodegenChaosA64)
|
||||
{
|
||||
for (int reg = 0; reg < 32; ++reg)
|
||||
{
|
||||
if (poisongpr & (1u << reg))
|
||||
build.mov(RegisterA64{KindA64::x, uint8_t(reg)}, 0xdead);
|
||||
if (poisonsimd & (1u << reg))
|
||||
build.fmov(RegisterA64{KindA64::d, uint8_t(reg)}, -0.125);
|
||||
}
|
||||
}
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
void IrRegAllocA64::restore(AssemblyBuilderA64& build, size_t start)
|
||||
{
|
||||
LUAU_ASSERT(start <= spills.size());
|
||||
|
||||
if (start < spills.size())
|
||||
{
|
||||
for (size_t i = start; i < spills.size(); ++i)
|
||||
{
|
||||
Spill s = spills[i]; // copy in case takeReg reallocates spills
|
||||
RegisterA64 reg = takeReg(s.origin, s.inst);
|
||||
|
||||
restoreInst(build, freeSpillSlots, function, s, reg);
|
||||
}
|
||||
|
||||
spills.resize(start);
|
||||
}
|
||||
}
|
||||
|
||||
void IrRegAllocA64::restoreReg(AssemblyBuilderA64& build, IrInst& inst)
|
||||
{
|
||||
uint32_t index = function.getInstIndex(inst);
|
||||
|
||||
for (size_t i = 0; i < spills.size(); ++i)
|
||||
{
|
||||
if (spills[i].inst == index)
|
||||
{
|
||||
Spill s = spills[i]; // copy in case allocReg reallocates spills
|
||||
RegisterA64 reg = allocReg(s.origin.kind, index);
|
||||
|
||||
restoreInst(build, freeSpillSlots, function, s, reg);
|
||||
|
||||
spills[i] = spills.back();
|
||||
spills.pop_back();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
LUAU_ASSERT(!"Expected to find a spill record");
|
||||
}
|
||||
|
||||
void IrRegAllocA64::assertNoSpills() const
|
||||
{
|
||||
LUAU_ASSERT(spills.empty());
|
||||
}
|
||||
|
||||
IrRegAllocA64::Set& IrRegAllocA64::getSet(KindA64 kind)
|
||||
{
|
||||
switch (kind)
|
||||
{
|
||||
case KindA64::x:
|
||||
case KindA64::w:
|
||||
return gpr;
|
||||
|
||||
case KindA64::s:
|
||||
case KindA64::d:
|
||||
case KindA64::q:
|
||||
return simd;
|
||||
|
||||
default:
|
||||
LUAU_ASSERT(!"Unexpected register kind");
|
||||
LUAU_UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace A64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,84 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/IrData.h"
|
||||
#include "Luau/RegisterA64.h"
|
||||
|
||||
#include <initializer_list>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
namespace A64
|
||||
{
|
||||
|
||||
class AssemblyBuilderA64;
|
||||
|
||||
struct IrRegAllocA64
|
||||
{
|
||||
IrRegAllocA64(IrFunction& function, std::initializer_list<std::pair<RegisterA64, RegisterA64>> regs);
|
||||
|
||||
RegisterA64 allocReg(KindA64 kind, uint32_t index);
|
||||
RegisterA64 allocTemp(KindA64 kind);
|
||||
RegisterA64 allocReuse(KindA64 kind, uint32_t index, std::initializer_list<IrOp> oprefs);
|
||||
|
||||
RegisterA64 takeReg(RegisterA64 reg, uint32_t index);
|
||||
|
||||
void freeReg(RegisterA64 reg);
|
||||
|
||||
void freeLastUseReg(IrInst& target, uint32_t index);
|
||||
void freeLastUseRegs(const IrInst& inst, uint32_t index);
|
||||
|
||||
void freeTempRegs();
|
||||
|
||||
// Spills all live registers that outlive current instruction; all allocated registers are assumed to be undefined
|
||||
size_t spill(AssemblyBuilderA64& build, uint32_t index, std::initializer_list<RegisterA64> live = {});
|
||||
|
||||
// Restores registers starting from the offset returned by spill(); all spills will be restored to the original registers
|
||||
void restore(AssemblyBuilderA64& build, size_t start);
|
||||
|
||||
// Restores register for a single instruction; may not assign the previously used register!
|
||||
void restoreReg(AssemblyBuilderA64& build, IrInst& inst);
|
||||
|
||||
void assertNoSpills() const;
|
||||
|
||||
struct Set
|
||||
{
|
||||
// which registers are in the set that the allocator manages (initialized at construction)
|
||||
uint32_t base = 0;
|
||||
|
||||
// which subset of initial set is free
|
||||
uint32_t free = 0;
|
||||
|
||||
// which subset of initial set is allocated as temporary
|
||||
uint32_t temp = 0;
|
||||
|
||||
// which instruction is defining which register (for spilling); only valid if not free and not temp
|
||||
uint32_t defs[32];
|
||||
};
|
||||
|
||||
struct Spill
|
||||
{
|
||||
uint32_t inst;
|
||||
|
||||
RegisterA64 origin;
|
||||
int8_t slot;
|
||||
};
|
||||
|
||||
Set& getSet(KindA64 kind);
|
||||
|
||||
IrFunction& function;
|
||||
Set gpr, simd;
|
||||
|
||||
std::vector<Spill> spills;
|
||||
|
||||
// which 8-byte slots are free
|
||||
uint32_t freeSpillSlots = 0;
|
||||
};
|
||||
|
||||
} // namespace A64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,492 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "Luau/IrRegAllocX64.h"
|
||||
|
||||
#include "Luau/IrUtils.h"
|
||||
|
||||
#include "EmitCommonX64.h"
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
namespace X64
|
||||
{
|
||||
|
||||
static const RegisterX64 kGprAllocOrder[] = {rax, rdx, rcx, rbx, rsi, rdi, r8, r9, r10, r11};
|
||||
|
||||
IrRegAllocX64::IrRegAllocX64(AssemblyBuilderX64& build, IrFunction& function)
|
||||
: build(build)
|
||||
, function(function)
|
||||
{
|
||||
freeGprMap.fill(true);
|
||||
gprInstUsers.fill(kInvalidInstIdx);
|
||||
freeXmmMap.fill(true);
|
||||
xmmInstUsers.fill(kInvalidInstIdx);
|
||||
}
|
||||
|
||||
RegisterX64 IrRegAllocX64::allocReg(SizeX64 size, uint32_t instIdx)
|
||||
{
|
||||
if (size == SizeX64::xmmword)
|
||||
{
|
||||
for (size_t i = 0; i < freeXmmMap.size(); ++i)
|
||||
{
|
||||
if (freeXmmMap[i])
|
||||
{
|
||||
freeXmmMap[i] = false;
|
||||
xmmInstUsers[i] = instIdx;
|
||||
return RegisterX64{size, uint8_t(i)};
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (RegisterX64 reg : kGprAllocOrder)
|
||||
{
|
||||
if (freeGprMap[reg.index])
|
||||
{
|
||||
freeGprMap[reg.index] = false;
|
||||
gprInstUsers[reg.index] = instIdx;
|
||||
return RegisterX64{size, reg.index};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Out of registers, spill the value with the furthest next use
|
||||
const std::array<uint32_t, 16>& regInstUsers = size == SizeX64::xmmword ? xmmInstUsers : gprInstUsers;
|
||||
if (uint32_t furthestUseTarget = findInstructionWithFurthestNextUse(regInstUsers); furthestUseTarget != kInvalidInstIdx)
|
||||
return takeReg(function.instructions[furthestUseTarget].regX64, instIdx);
|
||||
|
||||
LUAU_ASSERT(!"Out of registers to allocate");
|
||||
return noreg;
|
||||
}
|
||||
|
||||
RegisterX64 IrRegAllocX64::allocRegOrReuse(SizeX64 size, uint32_t instIdx, std::initializer_list<IrOp> oprefs)
|
||||
{
|
||||
for (IrOp op : oprefs)
|
||||
{
|
||||
if (op.kind != IrOpKind::Inst)
|
||||
continue;
|
||||
|
||||
IrInst& source = function.instructions[op.index];
|
||||
|
||||
if (source.lastUse == instIdx && !source.reusedReg && !source.spilled && !source.needsReload)
|
||||
{
|
||||
// Not comparing size directly because we only need matching register set
|
||||
if ((size == SizeX64::xmmword) != (source.regX64.size == SizeX64::xmmword))
|
||||
continue;
|
||||
|
||||
LUAU_ASSERT(source.regX64 != noreg);
|
||||
|
||||
source.reusedReg = true;
|
||||
|
||||
if (size == SizeX64::xmmword)
|
||||
xmmInstUsers[source.regX64.index] = instIdx;
|
||||
else
|
||||
gprInstUsers[source.regX64.index] = instIdx;
|
||||
|
||||
return RegisterX64{size, source.regX64.index};
|
||||
}
|
||||
}
|
||||
|
||||
return allocReg(size, instIdx);
|
||||
}
|
||||
|
||||
RegisterX64 IrRegAllocX64::takeReg(RegisterX64 reg, uint32_t instIdx)
|
||||
{
|
||||
if (reg.size == SizeX64::xmmword)
|
||||
{
|
||||
if (!freeXmmMap[reg.index])
|
||||
{
|
||||
LUAU_ASSERT(xmmInstUsers[reg.index] != kInvalidInstIdx);
|
||||
preserve(function.instructions[xmmInstUsers[reg.index]]);
|
||||
}
|
||||
|
||||
LUAU_ASSERT(freeXmmMap[reg.index]);
|
||||
freeXmmMap[reg.index] = false;
|
||||
xmmInstUsers[reg.index] = instIdx;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!freeGprMap[reg.index])
|
||||
{
|
||||
LUAU_ASSERT(gprInstUsers[reg.index] != kInvalidInstIdx);
|
||||
preserve(function.instructions[gprInstUsers[reg.index]]);
|
||||
}
|
||||
|
||||
LUAU_ASSERT(freeGprMap[reg.index]);
|
||||
freeGprMap[reg.index] = false;
|
||||
gprInstUsers[reg.index] = instIdx;
|
||||
}
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
void IrRegAllocX64::freeReg(RegisterX64 reg)
|
||||
{
|
||||
if (reg.size == SizeX64::xmmword)
|
||||
{
|
||||
LUAU_ASSERT(!freeXmmMap[reg.index]);
|
||||
freeXmmMap[reg.index] = true;
|
||||
xmmInstUsers[reg.index] = kInvalidInstIdx;
|
||||
}
|
||||
else
|
||||
{
|
||||
LUAU_ASSERT(!freeGprMap[reg.index]);
|
||||
freeGprMap[reg.index] = true;
|
||||
gprInstUsers[reg.index] = kInvalidInstIdx;
|
||||
}
|
||||
}
|
||||
|
||||
void IrRegAllocX64::freeLastUseReg(IrInst& target, uint32_t instIdx)
|
||||
{
|
||||
if (isLastUseReg(target, instIdx))
|
||||
{
|
||||
LUAU_ASSERT(!target.spilled && !target.needsReload);
|
||||
|
||||
// Register might have already been freed if it had multiple uses inside a single instruction
|
||||
if (target.regX64 == noreg)
|
||||
return;
|
||||
|
||||
freeReg(target.regX64);
|
||||
target.regX64 = noreg;
|
||||
}
|
||||
}
|
||||
|
||||
void IrRegAllocX64::freeLastUseRegs(const IrInst& inst, uint32_t instIdx)
|
||||
{
|
||||
auto checkOp = [this, instIdx](IrOp op) {
|
||||
if (op.kind == IrOpKind::Inst)
|
||||
freeLastUseReg(function.instructions[op.index], instIdx);
|
||||
};
|
||||
|
||||
checkOp(inst.a);
|
||||
checkOp(inst.b);
|
||||
checkOp(inst.c);
|
||||
checkOp(inst.d);
|
||||
checkOp(inst.e);
|
||||
checkOp(inst.f);
|
||||
}
|
||||
|
||||
bool IrRegAllocX64::isLastUseReg(const IrInst& target, uint32_t instIdx) const
|
||||
{
|
||||
return target.lastUse == instIdx && !target.reusedReg;
|
||||
}
|
||||
|
||||
void IrRegAllocX64::preserve(IrInst& inst)
|
||||
{
|
||||
IrSpillX64 spill;
|
||||
spill.instIdx = function.getInstIndex(inst);
|
||||
spill.valueKind = getCmdValueKind(inst.cmd);
|
||||
spill.spillId = nextSpillId++;
|
||||
spill.originalLoc = inst.regX64;
|
||||
|
||||
// Loads from VmReg/VmConst don't have to be spilled, they can be restored from a register later
|
||||
if (!hasRestoreOp(inst))
|
||||
{
|
||||
unsigned i = findSpillStackSlot(spill.valueKind);
|
||||
|
||||
if (spill.valueKind == IrValueKind::Tvalue)
|
||||
build.vmovups(xmmword[sSpillArea + i * 8], inst.regX64);
|
||||
else if (spill.valueKind == IrValueKind::Double)
|
||||
build.vmovsd(qword[sSpillArea + i * 8], inst.regX64);
|
||||
else if (spill.valueKind == IrValueKind::Pointer)
|
||||
build.mov(qword[sSpillArea + i * 8], inst.regX64);
|
||||
else if (spill.valueKind == IrValueKind::Tag || spill.valueKind == IrValueKind::Int)
|
||||
build.mov(dword[sSpillArea + i * 8], inst.regX64);
|
||||
else
|
||||
LUAU_ASSERT(!"unsupported value kind");
|
||||
|
||||
usedSpillSlots.set(i);
|
||||
|
||||
if (i + 1 > maxUsedSlot)
|
||||
maxUsedSlot = i + 1;
|
||||
|
||||
if (spill.valueKind == IrValueKind::Tvalue)
|
||||
{
|
||||
usedSpillSlots.set(i + 1);
|
||||
|
||||
if (i + 2 > maxUsedSlot)
|
||||
maxUsedSlot = i + 2;
|
||||
}
|
||||
|
||||
spill.stackSlot = uint8_t(i);
|
||||
inst.spilled = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
inst.needsReload = true;
|
||||
}
|
||||
|
||||
spills.push_back(spill);
|
||||
|
||||
freeReg(inst.regX64);
|
||||
inst.regX64 = noreg;
|
||||
}
|
||||
|
||||
void IrRegAllocX64::restore(IrInst& inst, bool intoOriginalLocation)
|
||||
{
|
||||
uint32_t instIdx = function.getInstIndex(inst);
|
||||
|
||||
for (size_t i = 0; i < spills.size(); i++)
|
||||
{
|
||||
if (spills[i].instIdx == instIdx)
|
||||
{
|
||||
RegisterX64 reg = intoOriginalLocation ? takeReg(spills[i].originalLoc, instIdx) : allocReg(spills[i].originalLoc.size, instIdx);
|
||||
OperandX64 restoreLocation = noreg;
|
||||
|
||||
// Previous call might have relocated the spill vector, so this reference can't be taken earlier
|
||||
const IrSpillX64& spill = spills[i];
|
||||
|
||||
if (spill.stackSlot != kNoStackSlot)
|
||||
{
|
||||
restoreLocation = addr[sSpillArea + spill.stackSlot * 8];
|
||||
restoreLocation.memSize = reg.size;
|
||||
|
||||
usedSpillSlots.set(spill.stackSlot, false);
|
||||
|
||||
if (spill.valueKind == IrValueKind::Tvalue)
|
||||
usedSpillSlots.set(spill.stackSlot + 1, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
restoreLocation = getRestoreAddress(inst, getRestoreOp(inst));
|
||||
}
|
||||
|
||||
if (spill.valueKind == IrValueKind::Tvalue)
|
||||
build.vmovups(reg, restoreLocation);
|
||||
else if (spill.valueKind == IrValueKind::Double)
|
||||
build.vmovsd(reg, restoreLocation);
|
||||
else
|
||||
build.mov(reg, restoreLocation);
|
||||
|
||||
inst.regX64 = reg;
|
||||
inst.spilled = false;
|
||||
inst.needsReload = false;
|
||||
|
||||
spills[i] = spills.back();
|
||||
spills.pop_back();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void IrRegAllocX64::preserveAndFreeInstValues()
|
||||
{
|
||||
for (uint32_t instIdx : gprInstUsers)
|
||||
{
|
||||
if (instIdx != kInvalidInstIdx)
|
||||
preserve(function.instructions[instIdx]);
|
||||
}
|
||||
|
||||
for (uint32_t instIdx : xmmInstUsers)
|
||||
{
|
||||
if (instIdx != kInvalidInstIdx)
|
||||
preserve(function.instructions[instIdx]);
|
||||
}
|
||||
}
|
||||
|
||||
bool IrRegAllocX64::shouldFreeGpr(RegisterX64 reg) const
|
||||
{
|
||||
if (reg == noreg)
|
||||
return false;
|
||||
|
||||
LUAU_ASSERT(reg.size != SizeX64::xmmword);
|
||||
|
||||
for (RegisterX64 gpr : kGprAllocOrder)
|
||||
{
|
||||
if (reg.index == gpr.index)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned IrRegAllocX64::findSpillStackSlot(IrValueKind valueKind)
|
||||
{
|
||||
// Find a free stack slot. Two consecutive slots might be required for 16 byte TValues, so '- 1' is used
|
||||
for (unsigned i = 0; i < unsigned(usedSpillSlots.size() - 1); ++i)
|
||||
{
|
||||
if (usedSpillSlots.test(i))
|
||||
continue;
|
||||
|
||||
if (valueKind == IrValueKind::Tvalue && usedSpillSlots.test(i + 1))
|
||||
{
|
||||
++i; // No need to retest this double position
|
||||
continue;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
LUAU_ASSERT(!"nowhere to spill");
|
||||
return ~0u;
|
||||
}
|
||||
|
||||
IrOp IrRegAllocX64::getRestoreOp(const IrInst& inst) const
|
||||
{
|
||||
if (IrOp location = function.findRestoreOp(inst); location.kind == IrOpKind::VmReg || location.kind == IrOpKind::VmConst)
|
||||
return location;
|
||||
|
||||
return IrOp();
|
||||
}
|
||||
|
||||
bool IrRegAllocX64::hasRestoreOp(const IrInst& inst) const
|
||||
{
|
||||
return getRestoreOp(inst).kind != IrOpKind::None;
|
||||
}
|
||||
|
||||
OperandX64 IrRegAllocX64::getRestoreAddress(const IrInst& inst, IrOp restoreOp)
|
||||
{
|
||||
switch (getCmdValueKind(inst.cmd))
|
||||
{
|
||||
case IrValueKind::Unknown:
|
||||
case IrValueKind::None:
|
||||
LUAU_ASSERT(!"Invalid operand restore value kind");
|
||||
break;
|
||||
case IrValueKind::Tag:
|
||||
return restoreOp.kind == IrOpKind::VmReg ? luauRegTag(vmRegOp(restoreOp)) : luauConstantTag(vmConstOp(restoreOp));
|
||||
case IrValueKind::Int:
|
||||
LUAU_ASSERT(restoreOp.kind == IrOpKind::VmReg);
|
||||
return luauRegValueInt(vmRegOp(restoreOp));
|
||||
case IrValueKind::Pointer:
|
||||
return restoreOp.kind == IrOpKind::VmReg ? luauRegValue(vmRegOp(restoreOp)) : luauConstantValue(vmConstOp(restoreOp));
|
||||
case IrValueKind::Double:
|
||||
return restoreOp.kind == IrOpKind::VmReg ? luauRegValue(vmRegOp(restoreOp)) : luauConstantValue(vmConstOp(restoreOp));
|
||||
case IrValueKind::Tvalue:
|
||||
return restoreOp.kind == IrOpKind::VmReg ? luauReg(vmRegOp(restoreOp)) : luauConstant(vmConstOp(restoreOp));
|
||||
}
|
||||
|
||||
LUAU_ASSERT(!"Failed to find restore operand location");
|
||||
return noreg;
|
||||
}
|
||||
|
||||
uint32_t IrRegAllocX64::findInstructionWithFurthestNextUse(const std::array<uint32_t, 16>& regInstUsers) const
|
||||
{
|
||||
uint32_t furthestUseTarget = kInvalidInstIdx;
|
||||
uint32_t furthestUseLocation = 0;
|
||||
|
||||
for (uint32_t regInstUser : regInstUsers)
|
||||
{
|
||||
// Cannot spill temporary registers or the register of the value that's defined in the current instruction
|
||||
if (regInstUser == kInvalidInstIdx || regInstUser == currInstIdx)
|
||||
continue;
|
||||
|
||||
uint32_t nextUse = getNextInstUse(function, regInstUser, currInstIdx);
|
||||
|
||||
// Cannot spill value that is about to be used in the current instruction
|
||||
if (nextUse == currInstIdx)
|
||||
continue;
|
||||
|
||||
if (furthestUseTarget == kInvalidInstIdx || nextUse > furthestUseLocation)
|
||||
{
|
||||
furthestUseLocation = nextUse;
|
||||
furthestUseTarget = regInstUser;
|
||||
}
|
||||
}
|
||||
|
||||
return furthestUseTarget;
|
||||
}
|
||||
|
||||
void IrRegAllocX64::assertFree(RegisterX64 reg) const
|
||||
{
|
||||
if (reg.size == SizeX64::xmmword)
|
||||
LUAU_ASSERT(freeXmmMap[reg.index]);
|
||||
else
|
||||
LUAU_ASSERT(freeGprMap[reg.index]);
|
||||
}
|
||||
|
||||
void IrRegAllocX64::assertAllFree() const
|
||||
{
|
||||
for (RegisterX64 reg : kGprAllocOrder)
|
||||
LUAU_ASSERT(freeGprMap[reg.index]);
|
||||
|
||||
for (bool free : freeXmmMap)
|
||||
LUAU_ASSERT(free);
|
||||
}
|
||||
|
||||
void IrRegAllocX64::assertNoSpills() const
|
||||
{
|
||||
LUAU_ASSERT(spills.empty());
|
||||
}
|
||||
|
||||
ScopedRegX64::ScopedRegX64(IrRegAllocX64& owner)
|
||||
: owner(owner)
|
||||
, reg(noreg)
|
||||
{
|
||||
}
|
||||
|
||||
ScopedRegX64::ScopedRegX64(IrRegAllocX64& owner, SizeX64 size)
|
||||
: owner(owner)
|
||||
, reg(noreg)
|
||||
{
|
||||
alloc(size);
|
||||
}
|
||||
|
||||
ScopedRegX64::ScopedRegX64(IrRegAllocX64& owner, RegisterX64 reg)
|
||||
: owner(owner)
|
||||
, reg(reg)
|
||||
{
|
||||
}
|
||||
|
||||
ScopedRegX64::~ScopedRegX64()
|
||||
{
|
||||
if (reg != noreg)
|
||||
owner.freeReg(reg);
|
||||
}
|
||||
|
||||
void ScopedRegX64::alloc(SizeX64 size)
|
||||
{
|
||||
LUAU_ASSERT(reg == noreg);
|
||||
reg = owner.allocReg(size, kInvalidInstIdx);
|
||||
}
|
||||
|
||||
void ScopedRegX64::free()
|
||||
{
|
||||
LUAU_ASSERT(reg != noreg);
|
||||
owner.freeReg(reg);
|
||||
reg = noreg;
|
||||
}
|
||||
|
||||
RegisterX64 ScopedRegX64::release()
|
||||
{
|
||||
RegisterX64 tmp = reg;
|
||||
reg = noreg;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
ScopedSpills::ScopedSpills(IrRegAllocX64& owner)
|
||||
: owner(owner)
|
||||
{
|
||||
startSpillId = owner.nextSpillId;
|
||||
}
|
||||
|
||||
ScopedSpills::~ScopedSpills()
|
||||
{
|
||||
unsigned endSpillId = owner.nextSpillId;
|
||||
|
||||
for (size_t i = 0; i < owner.spills.size();)
|
||||
{
|
||||
IrSpillX64& spill = owner.spills[i];
|
||||
|
||||
// Restoring spills inside this scope cannot create new spills
|
||||
LUAU_ASSERT(spill.spillId < endSpillId);
|
||||
|
||||
// If spill was created inside current scope, it has to be restored
|
||||
if (spill.spillId >= startSpillId)
|
||||
{
|
||||
IrInst& inst = owner.function.instructions[spill.instIdx];
|
||||
|
||||
owner.restore(inst, /*intoOriginalLocation*/ true);
|
||||
|
||||
// Spill restore removes the spill entry, so loop is repeated at the same 'i'
|
||||
}
|
||||
else
|
||||
{
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace X64
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,827 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "IrTranslateBuiltins.h"
|
||||
|
||||
#include "Luau/Bytecode.h"
|
||||
#include "Luau/IrBuilder.h"
|
||||
|
||||
#include "lstate.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
// TODO: when nresults is less than our actual result count, we can skip computing/writing unused results
|
||||
|
||||
static const int kMinMaxUnrolledParams = 5;
|
||||
static const int kBit32BinaryOpUnrolledParams = 5;
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
static void builtinCheckDouble(IrBuilder& build, IrOp arg, IrOp fallback)
|
||||
{
|
||||
if (arg.kind == IrOpKind::Constant)
|
||||
LUAU_ASSERT(build.function.constOp(arg).kind == IrConstKind::Double);
|
||||
else
|
||||
build.loadAndCheckTag(arg, LUA_TNUMBER, fallback);
|
||||
}
|
||||
|
||||
static IrOp builtinLoadDouble(IrBuilder& build, IrOp arg)
|
||||
{
|
||||
if (arg.kind == IrOpKind::Constant)
|
||||
return arg;
|
||||
|
||||
return build.inst(IrCmd::LOAD_DOUBLE, arg);
|
||||
}
|
||||
|
||||
// Wrapper code for all builtins with a fixed signature and manual assembly lowering of the body
|
||||
|
||||
// (number, ...) -> number
|
||||
static BuiltinImplResult translateBuiltinNumberToNumber(
|
||||
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 1 || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(1), build.constInt(1));
|
||||
|
||||
if (ra != arg)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinNumberToNumberLibm(
|
||||
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 1 || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
|
||||
|
||||
IrOp res = build.inst(IrCmd::INVOKE_LIBM, build.constUint(bfid), va);
|
||||
|
||||
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res);
|
||||
|
||||
if (ra != arg)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltin2NumberToNumberLibm(
|
||||
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 2 || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
builtinCheckDouble(build, args, fallback);
|
||||
|
||||
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
|
||||
IrOp vb = builtinLoadDouble(build, args);
|
||||
|
||||
IrOp res = build.inst(IrCmd::INVOKE_LIBM, build.constUint(bfid), va, vb);
|
||||
|
||||
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res);
|
||||
|
||||
if (ra != arg)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinMathLdexp(
|
||||
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 2 || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
builtinCheckDouble(build, args, fallback);
|
||||
|
||||
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
|
||||
IrOp vb = builtinLoadDouble(build, args);
|
||||
|
||||
IrOp vbi = build.inst(IrCmd::NUM_TO_INT, vb);
|
||||
|
||||
IrOp res = build.inst(IrCmd::INVOKE_LIBM, build.constUint(bfid), va, vbi);
|
||||
|
||||
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res);
|
||||
|
||||
if (ra != arg)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
// (number, ...) -> (number, number)
|
||||
static BuiltinImplResult translateBuiltinNumberTo2Number(
|
||||
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 1 || nresults > 2)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
build.inst(
|
||||
IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(1), build.constInt(nresults == 1 ? 1 : 2));
|
||||
|
||||
if (ra != arg)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
|
||||
|
||||
if (nresults != 1)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra + 1), build.constTag(LUA_TNUMBER));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 2};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinAssert(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 1 || nresults != 0)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
IrOp cont = build.block(IrBlockKind::Internal);
|
||||
|
||||
// TODO: maybe adding a guard like CHECK_TRUTHY can be useful
|
||||
build.inst(IrCmd::JUMP_IF_FALSY, build.vmReg(arg), fallback, cont);
|
||||
build.beginBlock(cont);
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 0};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinMathDeg(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 1 || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
|
||||
const double rpd = (3.14159265358979323846 / 180.0);
|
||||
|
||||
IrOp varg = builtinLoadDouble(build, build.vmReg(arg));
|
||||
IrOp value = build.inst(IrCmd::DIV_NUM, varg, build.constDouble(rpd));
|
||||
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
|
||||
|
||||
if (ra != arg)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinMathRad(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 1 || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
|
||||
const double rpd = (3.14159265358979323846 / 180.0);
|
||||
|
||||
IrOp varg = builtinLoadDouble(build, build.vmReg(arg));
|
||||
IrOp value = build.inst(IrCmd::MUL_NUM, varg, build.constDouble(rpd));
|
||||
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
|
||||
|
||||
if (ra != arg)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinMathLog(
|
||||
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 1 || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
int libmId = bfid;
|
||||
std::optional<double> denom;
|
||||
|
||||
if (nparams != 1)
|
||||
{
|
||||
std::optional<double> y = build.function.asDoubleOp(args);
|
||||
|
||||
if (!y)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
if (*y == 2.0)
|
||||
libmId = LBF_IR_MATH_LOG2;
|
||||
else if (*y == 10.0)
|
||||
libmId = LBF_MATH_LOG10;
|
||||
else
|
||||
denom = log(*y);
|
||||
}
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
|
||||
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
|
||||
|
||||
IrOp res = build.inst(IrCmd::INVOKE_LIBM, build.constUint(libmId), va);
|
||||
|
||||
if (denom)
|
||||
res = build.inst(IrCmd::DIV_NUM, res, build.constDouble(*denom));
|
||||
|
||||
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res);
|
||||
|
||||
if (ra != arg)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinMathMin(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 2 || nparams > kMinMaxUnrolledParams || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
builtinCheckDouble(build, args, fallback);
|
||||
|
||||
for (int i = 3; i <= nparams; ++i)
|
||||
builtinCheckDouble(build, build.vmReg(vmRegOp(args) + (i - 2)), fallback);
|
||||
|
||||
IrOp varg1 = builtinLoadDouble(build, build.vmReg(arg));
|
||||
IrOp varg2 = builtinLoadDouble(build, args);
|
||||
|
||||
IrOp res = build.inst(IrCmd::MIN_NUM, varg2, varg1); // Swapped arguments are required for consistency with VM builtins
|
||||
|
||||
for (int i = 3; i <= nparams; ++i)
|
||||
{
|
||||
IrOp arg = builtinLoadDouble(build, build.vmReg(vmRegOp(args) + (i - 2)));
|
||||
res = build.inst(IrCmd::MIN_NUM, arg, res);
|
||||
}
|
||||
|
||||
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res);
|
||||
|
||||
if (ra != arg)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinMathMax(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 2 || nparams > kMinMaxUnrolledParams || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
builtinCheckDouble(build, args, fallback);
|
||||
|
||||
for (int i = 3; i <= nparams; ++i)
|
||||
builtinCheckDouble(build, build.vmReg(vmRegOp(args) + (i - 2)), fallback);
|
||||
|
||||
IrOp varg1 = builtinLoadDouble(build, build.vmReg(arg));
|
||||
IrOp varg2 = builtinLoadDouble(build, args);
|
||||
|
||||
IrOp res = build.inst(IrCmd::MAX_NUM, varg2, varg1); // Swapped arguments are required for consistency with VM builtins
|
||||
|
||||
for (int i = 3; i <= nparams; ++i)
|
||||
{
|
||||
IrOp arg = builtinLoadDouble(build, build.vmReg(vmRegOp(args) + (i - 2)));
|
||||
res = build.inst(IrCmd::MAX_NUM, arg, res);
|
||||
}
|
||||
|
||||
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res);
|
||||
|
||||
if (ra != arg)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinMathClamp(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 3 || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
IrOp block = build.block(IrBlockKind::Internal);
|
||||
|
||||
LUAU_ASSERT(args.kind == IrOpKind::VmReg);
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
builtinCheckDouble(build, args, fallback);
|
||||
builtinCheckDouble(build, build.vmReg(vmRegOp(args) + 1), fallback);
|
||||
|
||||
IrOp min = builtinLoadDouble(build, args);
|
||||
IrOp max = builtinLoadDouble(build, build.vmReg(vmRegOp(args) + 1));
|
||||
|
||||
build.inst(IrCmd::JUMP_CMP_NUM, min, max, build.cond(IrCondition::NotLessEqual), fallback, block);
|
||||
build.beginBlock(block);
|
||||
|
||||
IrOp v = builtinLoadDouble(build, build.vmReg(arg));
|
||||
IrOp r = build.inst(IrCmd::MAX_NUM, min, v);
|
||||
IrOp clamped = build.inst(IrCmd::MIN_NUM, max, r);
|
||||
|
||||
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), clamped);
|
||||
|
||||
if (ra != arg)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinMathUnary(IrBuilder& build, IrCmd cmd, int nparams, int ra, int arg, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 1 || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
|
||||
IrOp varg = builtinLoadDouble(build, build.vmReg(arg));
|
||||
IrOp result = build.inst(cmd, varg);
|
||||
|
||||
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), result);
|
||||
|
||||
if (ra != arg)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinType(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 1 || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
build.inst(IrCmd::FASTCALL, build.constUint(LBF_TYPE), build.vmReg(ra), build.vmReg(arg), args, build.constInt(1), build.constInt(1));
|
||||
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TSTRING));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinTypeof(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 1 || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
build.inst(IrCmd::FASTCALL, build.constUint(LBF_TYPEOF), build.vmReg(ra), build.vmReg(arg), args, build.constInt(1), build.constInt(1));
|
||||
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TSTRING));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinBit32BinaryOp(
|
||||
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 2 || nparams > kBit32BinaryOpUnrolledParams || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
builtinCheckDouble(build, args, fallback);
|
||||
|
||||
for (int i = 3; i <= nparams; ++i)
|
||||
builtinCheckDouble(build, build.vmReg(vmRegOp(args) + (i - 2)), fallback);
|
||||
|
||||
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
|
||||
IrOp vb = builtinLoadDouble(build, args);
|
||||
|
||||
IrOp vaui = build.inst(IrCmd::NUM_TO_UINT, va);
|
||||
IrOp vbui = build.inst(IrCmd::NUM_TO_UINT, vb);
|
||||
|
||||
|
||||
IrCmd cmd = IrCmd::NOP;
|
||||
if (bfid == LBF_BIT32_BAND || bfid == LBF_BIT32_BTEST)
|
||||
cmd = IrCmd::BITAND_UINT;
|
||||
else if (bfid == LBF_BIT32_BXOR)
|
||||
cmd = IrCmd::BITXOR_UINT;
|
||||
else if (bfid == LBF_BIT32_BOR)
|
||||
cmd = IrCmd::BITOR_UINT;
|
||||
|
||||
LUAU_ASSERT(cmd != IrCmd::NOP);
|
||||
|
||||
IrOp res = build.inst(cmd, vaui, vbui);
|
||||
|
||||
for (int i = 3; i <= nparams; ++i)
|
||||
{
|
||||
IrOp vc = builtinLoadDouble(build, build.vmReg(vmRegOp(args) + (i - 2)));
|
||||
IrOp arg = build.inst(IrCmd::NUM_TO_UINT, vc);
|
||||
|
||||
res = build.inst(cmd, res, arg);
|
||||
}
|
||||
|
||||
if (bfid == LBF_BIT32_BTEST)
|
||||
{
|
||||
IrOp falsey = build.block(IrBlockKind::Internal);
|
||||
IrOp truthy = build.block(IrBlockKind::Internal);
|
||||
IrOp exit = build.block(IrBlockKind::Internal);
|
||||
build.inst(IrCmd::JUMP_EQ_INT, res, build.constInt(0), falsey, truthy);
|
||||
|
||||
build.beginBlock(falsey);
|
||||
build.inst(IrCmd::STORE_INT, build.vmReg(ra), build.constInt(0));
|
||||
build.inst(IrCmd::JUMP, exit);
|
||||
|
||||
build.beginBlock(truthy);
|
||||
build.inst(IrCmd::STORE_INT, build.vmReg(ra), build.constInt(1));
|
||||
build.inst(IrCmd::JUMP, exit);
|
||||
|
||||
|
||||
build.beginBlock(exit);
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TBOOLEAN));
|
||||
}
|
||||
else
|
||||
{
|
||||
IrOp value = build.inst(IrCmd::UINT_TO_NUM, res);
|
||||
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
|
||||
|
||||
if (ra != arg)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
|
||||
}
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinBit32Bnot(
|
||||
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 1 || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
|
||||
|
||||
IrOp vaui = build.inst(IrCmd::NUM_TO_UINT, va);
|
||||
IrOp not_ = build.inst(IrCmd::BITNOT_UINT, vaui);
|
||||
IrOp value = build.inst(IrCmd::UINT_TO_NUM, not_);
|
||||
|
||||
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
|
||||
|
||||
if (ra != arg)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinBit32Shift(
|
||||
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 2 || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
IrOp block = build.block(IrBlockKind::Internal);
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
builtinCheckDouble(build, args, fallback);
|
||||
|
||||
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
|
||||
IrOp vb = builtinLoadDouble(build, args);
|
||||
|
||||
IrOp vaui = build.inst(IrCmd::NUM_TO_UINT, va);
|
||||
IrOp vbi = build.inst(IrCmd::NUM_TO_INT, vb);
|
||||
|
||||
build.inst(IrCmd::JUMP_GE_UINT, vbi, build.constInt(32), fallback, block);
|
||||
build.beginBlock(block);
|
||||
|
||||
IrCmd cmd = IrCmd::NOP;
|
||||
if (bfid == LBF_BIT32_LSHIFT)
|
||||
cmd = IrCmd::BITLSHIFT_UINT;
|
||||
else if (bfid == LBF_BIT32_RSHIFT)
|
||||
cmd = IrCmd::BITRSHIFT_UINT;
|
||||
else if (bfid == LBF_BIT32_ARSHIFT)
|
||||
cmd = IrCmd::BITARSHIFT_UINT;
|
||||
|
||||
LUAU_ASSERT(cmd != IrCmd::NOP);
|
||||
|
||||
IrOp shift = build.inst(cmd, vaui, vbi);
|
||||
|
||||
IrOp value = build.inst(IrCmd::UINT_TO_NUM, shift);
|
||||
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
|
||||
|
||||
if (ra != arg)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinBit32Rotate(
|
||||
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 2 || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
builtinCheckDouble(build, args, fallback);
|
||||
|
||||
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
|
||||
IrOp vb = builtinLoadDouble(build, args);
|
||||
|
||||
IrOp vaui = build.inst(IrCmd::NUM_TO_UINT, va);
|
||||
IrOp vbi = build.inst(IrCmd::NUM_TO_INT, vb);
|
||||
|
||||
IrCmd cmd = (bfid == LBF_BIT32_LROTATE) ? IrCmd::BITLROTATE_UINT : IrCmd::BITRROTATE_UINT;
|
||||
IrOp shift = build.inst(cmd, vaui, vbi);
|
||||
|
||||
IrOp value = build.inst(IrCmd::UINT_TO_NUM, shift);
|
||||
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
|
||||
|
||||
if (ra != arg)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinBit32Extract(
|
||||
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 2 || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
builtinCheckDouble(build, args, fallback);
|
||||
|
||||
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
|
||||
IrOp vb = builtinLoadDouble(build, args);
|
||||
|
||||
IrOp n = build.inst(IrCmd::NUM_TO_UINT, va);
|
||||
IrOp f = build.inst(IrCmd::NUM_TO_INT, vb);
|
||||
|
||||
IrOp value;
|
||||
if (nparams == 2)
|
||||
{
|
||||
IrOp block = build.block(IrBlockKind::Internal);
|
||||
build.inst(IrCmd::JUMP_GE_UINT, f, build.constInt(32), fallback, block);
|
||||
build.beginBlock(block);
|
||||
|
||||
// TODO: this can be optimized using a bit-select instruction (bt on x86)
|
||||
IrOp shift = build.inst(IrCmd::BITRSHIFT_UINT, n, f);
|
||||
value = build.inst(IrCmd::BITAND_UINT, shift, build.constInt(1));
|
||||
}
|
||||
else
|
||||
{
|
||||
builtinCheckDouble(build, build.vmReg(args.index + 1), fallback);
|
||||
IrOp vc = builtinLoadDouble(build, build.vmReg(args.index + 1));
|
||||
IrOp w = build.inst(IrCmd::NUM_TO_INT, vc);
|
||||
|
||||
IrOp block1 = build.block(IrBlockKind::Internal);
|
||||
build.inst(IrCmd::JUMP_LT_INT, f, build.constInt(0), fallback, block1);
|
||||
build.beginBlock(block1);
|
||||
|
||||
IrOp block2 = build.block(IrBlockKind::Internal);
|
||||
build.inst(IrCmd::JUMP_LT_INT, w, build.constInt(1), fallback, block2);
|
||||
build.beginBlock(block2);
|
||||
|
||||
IrOp block3 = build.block(IrBlockKind::Internal);
|
||||
IrOp fw = build.inst(IrCmd::ADD_INT, f, w);
|
||||
build.inst(IrCmd::JUMP_LT_INT, fw, build.constInt(33), block3, fallback);
|
||||
build.beginBlock(block3);
|
||||
|
||||
IrOp shift = build.inst(IrCmd::BITLSHIFT_UINT, build.constInt(0xfffffffe), build.inst(IrCmd::SUB_INT, w, build.constInt(1)));
|
||||
IrOp m = build.inst(IrCmd::BITNOT_UINT, shift);
|
||||
|
||||
IrOp nf = build.inst(IrCmd::BITRSHIFT_UINT, n, f);
|
||||
value = build.inst(IrCmd::BITAND_UINT, nf, m);
|
||||
}
|
||||
|
||||
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), build.inst(IrCmd::UINT_TO_NUM, value));
|
||||
|
||||
if (ra != arg)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinBit32ExtractK(
|
||||
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 2 || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
|
||||
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
|
||||
IrOp n = build.inst(IrCmd::NUM_TO_UINT, va);
|
||||
|
||||
double a2 = build.function.doubleOp(args);
|
||||
int fw = int(a2);
|
||||
|
||||
int f = fw & 31;
|
||||
int w1 = fw >> 5;
|
||||
|
||||
uint32_t m = ~(0xfffffffeu << w1);
|
||||
|
||||
IrOp nf = build.inst(IrCmd::BITRSHIFT_UINT, n, build.constInt(f));
|
||||
IrOp and_ = build.inst(IrCmd::BITAND_UINT, nf, build.constInt(m));
|
||||
|
||||
IrOp value = build.inst(IrCmd::UINT_TO_NUM, and_);
|
||||
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
|
||||
|
||||
if (ra != arg)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinBit32Countz(
|
||||
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 1 || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
|
||||
|
||||
IrOp vaui = build.inst(IrCmd::NUM_TO_UINT, va);
|
||||
|
||||
IrCmd cmd = (bfid == LBF_BIT32_COUNTLZ) ? IrCmd::BITCOUNTLZ_UINT : IrCmd::BITCOUNTRZ_UINT;
|
||||
IrOp bin = build.inst(cmd, vaui);
|
||||
|
||||
IrOp value = build.inst(IrCmd::UINT_TO_NUM, bin);
|
||||
|
||||
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), value);
|
||||
|
||||
if (ra != arg)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinBit32Replace(
|
||||
IrBuilder& build, LuauBuiltinFunction bfid, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 3 || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
builtinCheckDouble(build, args, fallback);
|
||||
builtinCheckDouble(build, build.vmReg(args.index + 1), fallback);
|
||||
|
||||
IrOp va = builtinLoadDouble(build, build.vmReg(arg));
|
||||
IrOp vb = builtinLoadDouble(build, args);
|
||||
IrOp vc = builtinLoadDouble(build, build.vmReg(args.index + 1));
|
||||
|
||||
IrOp n = build.inst(IrCmd::NUM_TO_UINT, va);
|
||||
IrOp v = build.inst(IrCmd::NUM_TO_UINT, vb);
|
||||
IrOp f = build.inst(IrCmd::NUM_TO_INT, vc);
|
||||
|
||||
IrOp value;
|
||||
if (nparams == 3)
|
||||
{
|
||||
IrOp block = build.block(IrBlockKind::Internal);
|
||||
build.inst(IrCmd::JUMP_GE_UINT, f, build.constInt(32), fallback, block);
|
||||
build.beginBlock(block);
|
||||
|
||||
// TODO: this can be optimized using a bit-select instruction (btr on x86)
|
||||
IrOp m = build.constInt(1);
|
||||
IrOp shift = build.inst(IrCmd::BITLSHIFT_UINT, m, f);
|
||||
IrOp not_ = build.inst(IrCmd::BITNOT_UINT, shift);
|
||||
IrOp lhs = build.inst(IrCmd::BITAND_UINT, n, not_);
|
||||
|
||||
IrOp vm = build.inst(IrCmd::BITAND_UINT, v, m);
|
||||
IrOp rhs = build.inst(IrCmd::BITLSHIFT_UINT, vm, f);
|
||||
|
||||
value = build.inst(IrCmd::BITOR_UINT, lhs, rhs);
|
||||
}
|
||||
else
|
||||
{
|
||||
builtinCheckDouble(build, build.vmReg(args.index + 2), fallback);
|
||||
IrOp vd = builtinLoadDouble(build, build.vmReg(args.index + 2));
|
||||
IrOp w = build.inst(IrCmd::NUM_TO_INT, vd);
|
||||
|
||||
IrOp block1 = build.block(IrBlockKind::Internal);
|
||||
build.inst(IrCmd::JUMP_LT_INT, f, build.constInt(0), fallback, block1);
|
||||
build.beginBlock(block1);
|
||||
|
||||
IrOp block2 = build.block(IrBlockKind::Internal);
|
||||
build.inst(IrCmd::JUMP_LT_INT, w, build.constInt(1), fallback, block2);
|
||||
build.beginBlock(block2);
|
||||
|
||||
IrOp block3 = build.block(IrBlockKind::Internal);
|
||||
IrOp fw = build.inst(IrCmd::ADD_INT, f, w);
|
||||
build.inst(IrCmd::JUMP_LT_INT, fw, build.constInt(33), block3, fallback);
|
||||
build.beginBlock(block3);
|
||||
|
||||
IrOp shift1 = build.inst(IrCmd::BITLSHIFT_UINT, build.constInt(0xfffffffe), build.inst(IrCmd::SUB_INT, w, build.constInt(1)));
|
||||
IrOp m = build.inst(IrCmd::BITNOT_UINT, shift1);
|
||||
|
||||
IrOp shift2 = build.inst(IrCmd::BITLSHIFT_UINT, m, f);
|
||||
IrOp not_ = build.inst(IrCmd::BITNOT_UINT, shift2);
|
||||
IrOp lhs = build.inst(IrCmd::BITAND_UINT, n, not_);
|
||||
|
||||
IrOp vm = build.inst(IrCmd::BITAND_UINT, v, m);
|
||||
IrOp rhs = build.inst(IrCmd::BITLSHIFT_UINT, vm, f);
|
||||
|
||||
value = build.inst(IrCmd::BITOR_UINT, lhs, rhs);
|
||||
}
|
||||
|
||||
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), build.inst(IrCmd::UINT_TO_NUM, value));
|
||||
|
||||
if (ra != arg)
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
static BuiltinImplResult translateBuiltinVector(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
|
||||
{
|
||||
if (nparams < 3 || nresults > 1)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
LUAU_ASSERT(LUA_VECTOR_SIZE == 3);
|
||||
|
||||
builtinCheckDouble(build, build.vmReg(arg), fallback);
|
||||
builtinCheckDouble(build, args, fallback);
|
||||
builtinCheckDouble(build, build.vmReg(vmRegOp(args) + 1), fallback);
|
||||
|
||||
IrOp x = builtinLoadDouble(build, build.vmReg(arg));
|
||||
IrOp y = builtinLoadDouble(build, args);
|
||||
IrOp z = builtinLoadDouble(build, build.vmReg(vmRegOp(args) + 1));
|
||||
|
||||
build.inst(IrCmd::STORE_VECTOR, build.vmReg(ra), x, y, z);
|
||||
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TVECTOR));
|
||||
|
||||
return {BuiltinImplType::UsesFallback, 1};
|
||||
}
|
||||
|
||||
BuiltinImplResult translateBuiltin(IrBuilder& build, int bfid, int ra, int arg, IrOp args, int nparams, int nresults, IrOp fallback)
|
||||
{
|
||||
// Builtins are not allowed to handle variadic arguments
|
||||
if (nparams == LUA_MULTRET)
|
||||
return {BuiltinImplType::None, -1};
|
||||
|
||||
switch (bfid)
|
||||
{
|
||||
case LBF_ASSERT:
|
||||
return translateBuiltinAssert(build, nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_MATH_DEG:
|
||||
return translateBuiltinMathDeg(build, nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_MATH_RAD:
|
||||
return translateBuiltinMathRad(build, nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_MATH_LOG:
|
||||
return translateBuiltinMathLog(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_MATH_MIN:
|
||||
return translateBuiltinMathMin(build, nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_MATH_MAX:
|
||||
return translateBuiltinMathMax(build, nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_MATH_CLAMP:
|
||||
return translateBuiltinMathClamp(build, nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_MATH_FLOOR:
|
||||
return translateBuiltinMathUnary(build, IrCmd::FLOOR_NUM, nparams, ra, arg, nresults, fallback);
|
||||
case LBF_MATH_CEIL:
|
||||
return translateBuiltinMathUnary(build, IrCmd::CEIL_NUM, nparams, ra, arg, nresults, fallback);
|
||||
case LBF_MATH_SQRT:
|
||||
return translateBuiltinMathUnary(build, IrCmd::SQRT_NUM, nparams, ra, arg, nresults, fallback);
|
||||
case LBF_MATH_ABS:
|
||||
return translateBuiltinMathUnary(build, IrCmd::ABS_NUM, nparams, ra, arg, nresults, fallback);
|
||||
case LBF_MATH_ROUND:
|
||||
return translateBuiltinMathUnary(build, IrCmd::ROUND_NUM, nparams, ra, arg, nresults, fallback);
|
||||
case LBF_MATH_EXP:
|
||||
case LBF_MATH_ASIN:
|
||||
case LBF_MATH_SIN:
|
||||
case LBF_MATH_SINH:
|
||||
case LBF_MATH_ACOS:
|
||||
case LBF_MATH_COS:
|
||||
case LBF_MATH_COSH:
|
||||
case LBF_MATH_ATAN:
|
||||
case LBF_MATH_TAN:
|
||||
case LBF_MATH_TANH:
|
||||
case LBF_MATH_LOG10:
|
||||
return translateBuiltinNumberToNumberLibm(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_MATH_SIGN:
|
||||
return translateBuiltinNumberToNumber(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_MATH_POW:
|
||||
case LBF_MATH_FMOD:
|
||||
case LBF_MATH_ATAN2:
|
||||
return translateBuiltin2NumberToNumberLibm(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_MATH_LDEXP:
|
||||
return translateBuiltinMathLdexp(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_MATH_FREXP:
|
||||
case LBF_MATH_MODF:
|
||||
return translateBuiltinNumberTo2Number(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_BIT32_BAND:
|
||||
case LBF_BIT32_BOR:
|
||||
case LBF_BIT32_BXOR:
|
||||
case LBF_BIT32_BTEST:
|
||||
return translateBuiltinBit32BinaryOp(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_BIT32_BNOT:
|
||||
return translateBuiltinBit32Bnot(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_BIT32_LSHIFT:
|
||||
case LBF_BIT32_RSHIFT:
|
||||
case LBF_BIT32_ARSHIFT:
|
||||
return translateBuiltinBit32Shift(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_BIT32_LROTATE:
|
||||
case LBF_BIT32_RROTATE:
|
||||
return translateBuiltinBit32Rotate(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_BIT32_EXTRACT:
|
||||
return translateBuiltinBit32Extract(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_BIT32_EXTRACTK:
|
||||
return translateBuiltinBit32ExtractK(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_BIT32_COUNTLZ:
|
||||
case LBF_BIT32_COUNTRZ:
|
||||
return translateBuiltinBit32Countz(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_BIT32_REPLACE:
|
||||
return translateBuiltinBit32Replace(build, LuauBuiltinFunction(bfid), nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_TYPE:
|
||||
return translateBuiltinType(build, nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_TYPEOF:
|
||||
return translateBuiltinTypeof(build, nparams, ra, arg, args, nresults, fallback);
|
||||
case LBF_VECTOR:
|
||||
return translateBuiltinVector(build, nparams, ra, arg, args, nresults, fallback);
|
||||
default:
|
||||
return {BuiltinImplType::None, -1};
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,27 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
struct IrBuilder;
|
||||
struct IrOp;
|
||||
|
||||
enum class BuiltinImplType
|
||||
{
|
||||
None,
|
||||
UsesFallback, // Uses fallback for unsupported cases
|
||||
};
|
||||
|
||||
struct BuiltinImplResult
|
||||
{
|
||||
BuiltinImplType type;
|
||||
int actualResultCount;
|
||||
};
|
||||
|
||||
BuiltinImplResult translateBuiltin(IrBuilder& build, int bfid, int ra, int arg, IrOp args, int nparams, int nresults, IrOp fallback);
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,68 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "ltm.h"
|
||||
|
||||
typedef uint32_t Instruction;
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
enum class IrCondition : uint8_t;
|
||||
struct IrOp;
|
||||
struct IrBuilder;
|
||||
enum class IrCmd : uint8_t;
|
||||
|
||||
void translateInstLoadNil(IrBuilder& build, const Instruction* pc);
|
||||
void translateInstLoadB(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstLoadN(IrBuilder& build, const Instruction* pc);
|
||||
void translateInstLoadK(IrBuilder& build, const Instruction* pc);
|
||||
void translateInstLoadKX(IrBuilder& build, const Instruction* pc);
|
||||
void translateInstMove(IrBuilder& build, const Instruction* pc);
|
||||
void translateInstJump(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstJumpBack(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstJumpIf(IrBuilder& build, const Instruction* pc, int pcpos, bool not_);
|
||||
void translateInstJumpIfEq(IrBuilder& build, const Instruction* pc, int pcpos, bool not_);
|
||||
void translateInstJumpIfCond(IrBuilder& build, const Instruction* pc, int pcpos, IrCondition cond);
|
||||
void translateInstJumpX(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstJumpxEqNil(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstJumpxEqB(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstJumpxEqN(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstJumpxEqS(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstBinary(IrBuilder& build, const Instruction* pc, int pcpos, TMS tm);
|
||||
void translateInstBinaryK(IrBuilder& build, const Instruction* pc, int pcpos, TMS tm);
|
||||
void translateInstNot(IrBuilder& build, const Instruction* pc);
|
||||
void translateInstMinus(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstLength(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstNewTable(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstDupTable(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstGetUpval(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstSetUpval(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstCloseUpvals(IrBuilder& build, const Instruction* pc);
|
||||
void translateFastCallN(IrBuilder& build, const Instruction* pc, int pcpos, bool customParams, int customParamCount, IrOp customArgs, IrOp next);
|
||||
void translateInstForNPrep(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstForNLoop(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstForGPrepNext(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstForGPrepInext(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstForGLoopIpairs(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstGetTableN(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstSetTableN(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstGetTable(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstSetTable(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstGetImport(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstGetTableKS(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstSetTableKS(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstGetGlobal(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstSetGlobal(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstConcat(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstCapture(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstNamecall(IrBuilder& build, const Instruction* pc, int pcpos);
|
||||
void translateInstAndX(IrBuilder& build, const Instruction* pc, int pcpos, IrOp c);
|
||||
void translateInstOrX(IrBuilder& build, const Instruction* pc, int pcpos, IrOp c);
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,791 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "Luau/IrUtils.h"
|
||||
|
||||
#include "Luau/IrBuilder.h"
|
||||
|
||||
#include "BitUtils.h"
|
||||
#include "NativeState.h"
|
||||
|
||||
#include "lua.h"
|
||||
#include "lnumutils.h"
|
||||
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
IrValueKind getCmdValueKind(IrCmd cmd)
|
||||
{
|
||||
switch (cmd)
|
||||
{
|
||||
case IrCmd::NOP:
|
||||
return IrValueKind::None;
|
||||
case IrCmd::LOAD_TAG:
|
||||
return IrValueKind::Tag;
|
||||
case IrCmd::LOAD_POINTER:
|
||||
return IrValueKind::Pointer;
|
||||
case IrCmd::LOAD_DOUBLE:
|
||||
return IrValueKind::Double;
|
||||
case IrCmd::LOAD_INT:
|
||||
return IrValueKind::Int;
|
||||
case IrCmd::LOAD_TVALUE:
|
||||
case IrCmd::LOAD_NODE_VALUE_TV:
|
||||
return IrValueKind::Tvalue;
|
||||
case IrCmd::LOAD_ENV:
|
||||
case IrCmd::GET_ARR_ADDR:
|
||||
case IrCmd::GET_SLOT_NODE_ADDR:
|
||||
case IrCmd::GET_HASH_NODE_ADDR:
|
||||
return IrValueKind::Pointer;
|
||||
case IrCmd::STORE_TAG:
|
||||
case IrCmd::STORE_POINTER:
|
||||
case IrCmd::STORE_DOUBLE:
|
||||
case IrCmd::STORE_INT:
|
||||
case IrCmd::STORE_VECTOR:
|
||||
case IrCmd::STORE_TVALUE:
|
||||
case IrCmd::STORE_NODE_VALUE_TV:
|
||||
return IrValueKind::None;
|
||||
case IrCmd::ADD_INT:
|
||||
case IrCmd::SUB_INT:
|
||||
return IrValueKind::Int;
|
||||
case IrCmd::ADD_NUM:
|
||||
case IrCmd::SUB_NUM:
|
||||
case IrCmd::MUL_NUM:
|
||||
case IrCmd::DIV_NUM:
|
||||
case IrCmd::MOD_NUM:
|
||||
case IrCmd::MIN_NUM:
|
||||
case IrCmd::MAX_NUM:
|
||||
case IrCmd::UNM_NUM:
|
||||
case IrCmd::FLOOR_NUM:
|
||||
case IrCmd::CEIL_NUM:
|
||||
case IrCmd::ROUND_NUM:
|
||||
case IrCmd::SQRT_NUM:
|
||||
case IrCmd::ABS_NUM:
|
||||
return IrValueKind::Double;
|
||||
case IrCmd::NOT_ANY:
|
||||
return IrValueKind::Int;
|
||||
case IrCmd::JUMP:
|
||||
case IrCmd::JUMP_IF_TRUTHY:
|
||||
case IrCmd::JUMP_IF_FALSY:
|
||||
case IrCmd::JUMP_EQ_TAG:
|
||||
case IrCmd::JUMP_EQ_INT:
|
||||
case IrCmd::JUMP_LT_INT:
|
||||
case IrCmd::JUMP_GE_UINT:
|
||||
case IrCmd::JUMP_EQ_POINTER:
|
||||
case IrCmd::JUMP_CMP_NUM:
|
||||
case IrCmd::JUMP_CMP_ANY:
|
||||
case IrCmd::JUMP_SLOT_MATCH:
|
||||
return IrValueKind::None;
|
||||
case IrCmd::TABLE_LEN:
|
||||
return IrValueKind::Double;
|
||||
case IrCmd::NEW_TABLE:
|
||||
case IrCmd::DUP_TABLE:
|
||||
return IrValueKind::Pointer;
|
||||
case IrCmd::TRY_NUM_TO_INDEX:
|
||||
return IrValueKind::Int;
|
||||
case IrCmd::TRY_CALL_FASTGETTM:
|
||||
return IrValueKind::Pointer;
|
||||
case IrCmd::INT_TO_NUM:
|
||||
case IrCmd::UINT_TO_NUM:
|
||||
return IrValueKind::Double;
|
||||
case IrCmd::NUM_TO_INT:
|
||||
case IrCmd::NUM_TO_UINT:
|
||||
return IrValueKind::Int;
|
||||
case IrCmd::ADJUST_STACK_TO_REG:
|
||||
case IrCmd::ADJUST_STACK_TO_TOP:
|
||||
return IrValueKind::None;
|
||||
case IrCmd::FASTCALL:
|
||||
return IrValueKind::None;
|
||||
case IrCmd::INVOKE_FASTCALL:
|
||||
return IrValueKind::Int;
|
||||
case IrCmd::CHECK_FASTCALL_RES:
|
||||
case IrCmd::DO_ARITH:
|
||||
case IrCmd::DO_LEN:
|
||||
case IrCmd::GET_TABLE:
|
||||
case IrCmd::SET_TABLE:
|
||||
case IrCmd::GET_IMPORT:
|
||||
case IrCmd::CONCAT:
|
||||
case IrCmd::GET_UPVALUE:
|
||||
case IrCmd::SET_UPVALUE:
|
||||
case IrCmd::PREPARE_FORN:
|
||||
case IrCmd::CHECK_TAG:
|
||||
case IrCmd::CHECK_READONLY:
|
||||
case IrCmd::CHECK_NO_METATABLE:
|
||||
case IrCmd::CHECK_SAFE_ENV:
|
||||
case IrCmd::CHECK_ARRAY_SIZE:
|
||||
case IrCmd::CHECK_SLOT_MATCH:
|
||||
case IrCmd::CHECK_NODE_NO_NEXT:
|
||||
case IrCmd::INTERRUPT:
|
||||
case IrCmd::CHECK_GC:
|
||||
case IrCmd::BARRIER_OBJ:
|
||||
case IrCmd::BARRIER_TABLE_BACK:
|
||||
case IrCmd::BARRIER_TABLE_FORWARD:
|
||||
case IrCmd::SET_SAVEDPC:
|
||||
case IrCmd::CLOSE_UPVALS:
|
||||
case IrCmd::CAPTURE:
|
||||
case IrCmd::SETLIST:
|
||||
case IrCmd::CALL:
|
||||
case IrCmd::RETURN:
|
||||
case IrCmd::FORGLOOP:
|
||||
case IrCmd::FORGLOOP_FALLBACK:
|
||||
case IrCmd::FORGPREP_XNEXT_FALLBACK:
|
||||
case IrCmd::COVERAGE:
|
||||
case IrCmd::FALLBACK_GETGLOBAL:
|
||||
case IrCmd::FALLBACK_SETGLOBAL:
|
||||
case IrCmd::FALLBACK_GETTABLEKS:
|
||||
case IrCmd::FALLBACK_SETTABLEKS:
|
||||
case IrCmd::FALLBACK_NAMECALL:
|
||||
case IrCmd::FALLBACK_PREPVARARGS:
|
||||
case IrCmd::FALLBACK_GETVARARGS:
|
||||
case IrCmd::FALLBACK_NEWCLOSURE:
|
||||
case IrCmd::FALLBACK_DUPCLOSURE:
|
||||
case IrCmd::FALLBACK_FORGPREP:
|
||||
return IrValueKind::None;
|
||||
case IrCmd::SUBSTITUTE:
|
||||
return IrValueKind::Unknown;
|
||||
case IrCmd::BITAND_UINT:
|
||||
case IrCmd::BITXOR_UINT:
|
||||
case IrCmd::BITOR_UINT:
|
||||
case IrCmd::BITNOT_UINT:
|
||||
case IrCmd::BITLSHIFT_UINT:
|
||||
case IrCmd::BITRSHIFT_UINT:
|
||||
case IrCmd::BITARSHIFT_UINT:
|
||||
case IrCmd::BITLROTATE_UINT:
|
||||
case IrCmd::BITRROTATE_UINT:
|
||||
case IrCmd::BITCOUNTLZ_UINT:
|
||||
case IrCmd::BITCOUNTRZ_UINT:
|
||||
return IrValueKind::Int;
|
||||
case IrCmd::INVOKE_LIBM:
|
||||
return IrValueKind::Double;
|
||||
}
|
||||
|
||||
LUAU_UNREACHABLE();
|
||||
}
|
||||
|
||||
static void removeInstUse(IrFunction& function, uint32_t instIdx)
|
||||
{
|
||||
IrInst& inst = function.instructions[instIdx];
|
||||
|
||||
LUAU_ASSERT(inst.useCount);
|
||||
inst.useCount--;
|
||||
|
||||
if (inst.useCount == 0)
|
||||
kill(function, inst);
|
||||
}
|
||||
|
||||
static void removeBlockUse(IrFunction& function, uint32_t blockIdx)
|
||||
{
|
||||
IrBlock& block = function.blocks[blockIdx];
|
||||
|
||||
LUAU_ASSERT(block.useCount);
|
||||
block.useCount--;
|
||||
|
||||
// Entry block is never removed because is has an implicit use
|
||||
if (block.useCount == 0 && blockIdx != 0)
|
||||
kill(function, block);
|
||||
}
|
||||
|
||||
void addUse(IrFunction& function, IrOp op)
|
||||
{
|
||||
if (op.kind == IrOpKind::Inst)
|
||||
function.instructions[op.index].useCount++;
|
||||
else if (op.kind == IrOpKind::Block)
|
||||
function.blocks[op.index].useCount++;
|
||||
}
|
||||
|
||||
void removeUse(IrFunction& function, IrOp op)
|
||||
{
|
||||
if (op.kind == IrOpKind::Inst)
|
||||
removeInstUse(function, op.index);
|
||||
else if (op.kind == IrOpKind::Block)
|
||||
removeBlockUse(function, op.index);
|
||||
}
|
||||
|
||||
bool isGCO(uint8_t tag)
|
||||
{
|
||||
// mirrors iscollectable(o) from VM/lobject.h
|
||||
return tag >= LUA_TSTRING;
|
||||
}
|
||||
|
||||
void kill(IrFunction& function, IrInst& inst)
|
||||
{
|
||||
LUAU_ASSERT(inst.useCount == 0);
|
||||
|
||||
inst.cmd = IrCmd::NOP;
|
||||
|
||||
removeUse(function, inst.a);
|
||||
removeUse(function, inst.b);
|
||||
removeUse(function, inst.c);
|
||||
removeUse(function, inst.d);
|
||||
removeUse(function, inst.e);
|
||||
removeUse(function, inst.f);
|
||||
|
||||
inst.a = {};
|
||||
inst.b = {};
|
||||
inst.c = {};
|
||||
inst.d = {};
|
||||
inst.e = {};
|
||||
inst.f = {};
|
||||
}
|
||||
|
||||
void kill(IrFunction& function, uint32_t start, uint32_t end)
|
||||
{
|
||||
// Kill instructions in reverse order to avoid killing instructions that are still marked as used
|
||||
for (int i = int(end); i >= int(start); i--)
|
||||
{
|
||||
LUAU_ASSERT(unsigned(i) < function.instructions.size());
|
||||
IrInst& curr = function.instructions[i];
|
||||
|
||||
if (curr.cmd == IrCmd::NOP)
|
||||
continue;
|
||||
|
||||
kill(function, curr);
|
||||
}
|
||||
}
|
||||
|
||||
void kill(IrFunction& function, IrBlock& block)
|
||||
{
|
||||
LUAU_ASSERT(block.useCount == 0);
|
||||
|
||||
block.kind = IrBlockKind::Dead;
|
||||
|
||||
kill(function, block.start, block.finish);
|
||||
block.start = ~0u;
|
||||
block.finish = ~0u;
|
||||
}
|
||||
|
||||
void replace(IrFunction& function, IrOp& original, IrOp replacement)
|
||||
{
|
||||
// Add use before removing new one if that's the last one keeping target operand alive
|
||||
addUse(function, replacement);
|
||||
removeUse(function, original);
|
||||
|
||||
original = replacement;
|
||||
}
|
||||
|
||||
void replace(IrFunction& function, IrBlock& block, uint32_t instIdx, IrInst replacement)
|
||||
{
|
||||
IrInst& inst = function.instructions[instIdx];
|
||||
|
||||
// Add uses before removing new ones if those are the last ones keeping target operand alive
|
||||
addUse(function, replacement.a);
|
||||
addUse(function, replacement.b);
|
||||
addUse(function, replacement.c);
|
||||
addUse(function, replacement.d);
|
||||
addUse(function, replacement.e);
|
||||
addUse(function, replacement.f);
|
||||
|
||||
// An extra reference is added so block will not remove itself
|
||||
block.useCount++;
|
||||
|
||||
// If we introduced an earlier terminating instruction, all following instructions become dead
|
||||
if (!isBlockTerminator(inst.cmd) && isBlockTerminator(replacement.cmd))
|
||||
{
|
||||
// Block has has to be fully constructed before replacement is performed
|
||||
LUAU_ASSERT(block.finish != ~0u);
|
||||
LUAU_ASSERT(instIdx + 1 <= block.finish);
|
||||
|
||||
kill(function, instIdx + 1, block.finish);
|
||||
|
||||
block.finish = instIdx;
|
||||
}
|
||||
|
||||
removeUse(function, inst.a);
|
||||
removeUse(function, inst.b);
|
||||
removeUse(function, inst.c);
|
||||
removeUse(function, inst.d);
|
||||
removeUse(function, inst.e);
|
||||
removeUse(function, inst.f);
|
||||
|
||||
// Inherit existing use count (last use is skipped as it will be defined later)
|
||||
replacement.useCount = inst.useCount;
|
||||
|
||||
inst = replacement;
|
||||
|
||||
// Removing the earlier extra reference, this might leave the block without users without marking it as dead
|
||||
// This will have to be handled by separate dead code elimination
|
||||
block.useCount--;
|
||||
}
|
||||
|
||||
void substitute(IrFunction& function, IrInst& inst, IrOp replacement)
|
||||
{
|
||||
LUAU_ASSERT(!isBlockTerminator(inst.cmd));
|
||||
|
||||
inst.cmd = IrCmd::SUBSTITUTE;
|
||||
|
||||
addUse(function, replacement);
|
||||
|
||||
removeUse(function, inst.a);
|
||||
removeUse(function, inst.b);
|
||||
removeUse(function, inst.c);
|
||||
removeUse(function, inst.d);
|
||||
removeUse(function, inst.e);
|
||||
removeUse(function, inst.f);
|
||||
|
||||
inst.a = replacement;
|
||||
inst.b = {};
|
||||
inst.c = {};
|
||||
inst.d = {};
|
||||
inst.e = {};
|
||||
inst.f = {};
|
||||
}
|
||||
|
||||
void applySubstitutions(IrFunction& function, IrOp& op)
|
||||
{
|
||||
if (op.kind == IrOpKind::Inst)
|
||||
{
|
||||
IrInst& src = function.instructions[op.index];
|
||||
|
||||
if (src.cmd == IrCmd::SUBSTITUTE)
|
||||
{
|
||||
op.kind = src.a.kind;
|
||||
op.index = src.a.index;
|
||||
|
||||
// If we substitute with the result of a different instruction, update the use count
|
||||
if (op.kind == IrOpKind::Inst)
|
||||
{
|
||||
IrInst& dst = function.instructions[op.index];
|
||||
LUAU_ASSERT(dst.cmd != IrCmd::SUBSTITUTE && "chained substitutions are not allowed");
|
||||
|
||||
dst.useCount++;
|
||||
}
|
||||
|
||||
LUAU_ASSERT(src.useCount > 0);
|
||||
src.useCount--;
|
||||
|
||||
if (src.useCount == 0)
|
||||
removeUse(function, src.a);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void applySubstitutions(IrFunction& function, IrInst& inst)
|
||||
{
|
||||
applySubstitutions(function, inst.a);
|
||||
applySubstitutions(function, inst.b);
|
||||
applySubstitutions(function, inst.c);
|
||||
applySubstitutions(function, inst.d);
|
||||
applySubstitutions(function, inst.e);
|
||||
applySubstitutions(function, inst.f);
|
||||
}
|
||||
|
||||
bool compare(double a, double b, IrCondition cond)
|
||||
{
|
||||
switch (cond)
|
||||
{
|
||||
case IrCondition::Equal:
|
||||
return a == b;
|
||||
case IrCondition::NotEqual:
|
||||
return a != b;
|
||||
case IrCondition::Less:
|
||||
return a < b;
|
||||
case IrCondition::NotLess:
|
||||
return !(a < b);
|
||||
case IrCondition::LessEqual:
|
||||
return a <= b;
|
||||
case IrCondition::NotLessEqual:
|
||||
return !(a <= b);
|
||||
case IrCondition::Greater:
|
||||
return a > b;
|
||||
case IrCondition::NotGreater:
|
||||
return !(a > b);
|
||||
case IrCondition::GreaterEqual:
|
||||
return a >= b;
|
||||
case IrCondition::NotGreaterEqual:
|
||||
return !(a >= b);
|
||||
default:
|
||||
LUAU_ASSERT(!"unsupported conidtion");
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void foldConstants(IrBuilder& build, IrFunction& function, IrBlock& block, uint32_t index)
|
||||
{
|
||||
IrInst& inst = function.instructions[index];
|
||||
|
||||
switch (inst.cmd)
|
||||
{
|
||||
case IrCmd::ADD_INT:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
{
|
||||
// We need to avoid signed integer overflow, but we also have to produce a result
|
||||
// So we add numbers as unsigned and use fixed-width integer types to force a two's complement evaluation
|
||||
int32_t lhs = function.intOp(inst.a);
|
||||
int32_t rhs = function.intOp(inst.b);
|
||||
int sum = int32_t(uint32_t(lhs) + uint32_t(rhs));
|
||||
|
||||
substitute(function, inst, build.constInt(sum));
|
||||
}
|
||||
break;
|
||||
case IrCmd::SUB_INT:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
{
|
||||
// We need to avoid signed integer overflow, but we also have to produce a result
|
||||
// So we subtract numbers as unsigned and use fixed-width integer types to force a two's complement evaluation
|
||||
int32_t lhs = function.intOp(inst.a);
|
||||
int32_t rhs = function.intOp(inst.b);
|
||||
int sum = int32_t(uint32_t(lhs) - uint32_t(rhs));
|
||||
|
||||
substitute(function, inst, build.constInt(sum));
|
||||
}
|
||||
break;
|
||||
case IrCmd::ADD_NUM:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
substitute(function, inst, build.constDouble(function.doubleOp(inst.a) + function.doubleOp(inst.b)));
|
||||
break;
|
||||
case IrCmd::SUB_NUM:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
substitute(function, inst, build.constDouble(function.doubleOp(inst.a) - function.doubleOp(inst.b)));
|
||||
break;
|
||||
case IrCmd::MUL_NUM:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
substitute(function, inst, build.constDouble(function.doubleOp(inst.a) * function.doubleOp(inst.b)));
|
||||
break;
|
||||
case IrCmd::DIV_NUM:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
substitute(function, inst, build.constDouble(function.doubleOp(inst.a) / function.doubleOp(inst.b)));
|
||||
break;
|
||||
case IrCmd::MOD_NUM:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
substitute(function, inst, build.constDouble(luai_nummod(function.doubleOp(inst.a), function.doubleOp(inst.b))));
|
||||
break;
|
||||
case IrCmd::MIN_NUM:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
{
|
||||
double a1 = function.doubleOp(inst.a);
|
||||
double a2 = function.doubleOp(inst.b);
|
||||
|
||||
substitute(function, inst, build.constDouble(a1 < a2 ? a1 : a2));
|
||||
}
|
||||
break;
|
||||
case IrCmd::MAX_NUM:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
{
|
||||
double a1 = function.doubleOp(inst.a);
|
||||
double a2 = function.doubleOp(inst.b);
|
||||
|
||||
substitute(function, inst, build.constDouble(a1 > a2 ? a1 : a2));
|
||||
}
|
||||
break;
|
||||
case IrCmd::UNM_NUM:
|
||||
if (inst.a.kind == IrOpKind::Constant)
|
||||
substitute(function, inst, build.constDouble(-function.doubleOp(inst.a)));
|
||||
break;
|
||||
case IrCmd::FLOOR_NUM:
|
||||
if (inst.a.kind == IrOpKind::Constant)
|
||||
substitute(function, inst, build.constDouble(floor(function.doubleOp(inst.a))));
|
||||
break;
|
||||
case IrCmd::CEIL_NUM:
|
||||
if (inst.a.kind == IrOpKind::Constant)
|
||||
substitute(function, inst, build.constDouble(ceil(function.doubleOp(inst.a))));
|
||||
break;
|
||||
case IrCmd::ROUND_NUM:
|
||||
if (inst.a.kind == IrOpKind::Constant)
|
||||
substitute(function, inst, build.constDouble(round(function.doubleOp(inst.a))));
|
||||
break;
|
||||
case IrCmd::SQRT_NUM:
|
||||
if (inst.a.kind == IrOpKind::Constant)
|
||||
substitute(function, inst, build.constDouble(sqrt(function.doubleOp(inst.a))));
|
||||
break;
|
||||
case IrCmd::ABS_NUM:
|
||||
if (inst.a.kind == IrOpKind::Constant)
|
||||
substitute(function, inst, build.constDouble(fabs(function.doubleOp(inst.a))));
|
||||
break;
|
||||
case IrCmd::NOT_ANY:
|
||||
if (inst.a.kind == IrOpKind::Constant)
|
||||
{
|
||||
uint8_t a = function.tagOp(inst.a);
|
||||
|
||||
if (a == LUA_TNIL)
|
||||
substitute(function, inst, build.constInt(1));
|
||||
else if (a != LUA_TBOOLEAN)
|
||||
substitute(function, inst, build.constInt(0));
|
||||
else if (inst.b.kind == IrOpKind::Constant)
|
||||
substitute(function, inst, build.constInt(function.intOp(inst.b) == 1 ? 0 : 1));
|
||||
}
|
||||
break;
|
||||
case IrCmd::JUMP_EQ_TAG:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
{
|
||||
if (function.tagOp(inst.a) == function.tagOp(inst.b))
|
||||
replace(function, block, index, {IrCmd::JUMP, inst.c});
|
||||
else
|
||||
replace(function, block, index, {IrCmd::JUMP, inst.d});
|
||||
}
|
||||
break;
|
||||
case IrCmd::JUMP_EQ_INT:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
{
|
||||
if (function.intOp(inst.a) == function.intOp(inst.b))
|
||||
replace(function, block, index, {IrCmd::JUMP, inst.c});
|
||||
else
|
||||
replace(function, block, index, {IrCmd::JUMP, inst.d});
|
||||
}
|
||||
break;
|
||||
case IrCmd::JUMP_LT_INT:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
{
|
||||
if (function.intOp(inst.a) < function.intOp(inst.b))
|
||||
replace(function, block, index, {IrCmd::JUMP, inst.c});
|
||||
else
|
||||
replace(function, block, index, {IrCmd::JUMP, inst.d});
|
||||
}
|
||||
break;
|
||||
case IrCmd::JUMP_GE_UINT:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
{
|
||||
if (unsigned(function.intOp(inst.a)) >= unsigned(function.intOp(inst.b)))
|
||||
replace(function, block, index, {IrCmd::JUMP, inst.c});
|
||||
else
|
||||
replace(function, block, index, {IrCmd::JUMP, inst.d});
|
||||
}
|
||||
break;
|
||||
case IrCmd::JUMP_CMP_NUM:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
{
|
||||
if (compare(function.doubleOp(inst.a), function.doubleOp(inst.b), conditionOp(inst.c)))
|
||||
replace(function, block, index, {IrCmd::JUMP, inst.d});
|
||||
else
|
||||
replace(function, block, index, {IrCmd::JUMP, inst.e});
|
||||
}
|
||||
break;
|
||||
case IrCmd::TRY_NUM_TO_INDEX:
|
||||
if (inst.a.kind == IrOpKind::Constant)
|
||||
{
|
||||
double value = function.doubleOp(inst.a);
|
||||
|
||||
// To avoid undefined behavior of casting a value not representable in the target type, we check the range
|
||||
if (value >= INT_MIN && value <= INT_MAX)
|
||||
{
|
||||
int arrIndex = int(value);
|
||||
|
||||
if (double(arrIndex) == value)
|
||||
substitute(function, inst, build.constInt(arrIndex));
|
||||
else
|
||||
replace(function, block, index, {IrCmd::JUMP, inst.b});
|
||||
}
|
||||
else
|
||||
{
|
||||
replace(function, block, index, {IrCmd::JUMP, inst.b});
|
||||
}
|
||||
}
|
||||
break;
|
||||
case IrCmd::INT_TO_NUM:
|
||||
if (inst.a.kind == IrOpKind::Constant)
|
||||
substitute(function, inst, build.constDouble(double(function.intOp(inst.a))));
|
||||
break;
|
||||
case IrCmd::UINT_TO_NUM:
|
||||
if (inst.a.kind == IrOpKind::Constant)
|
||||
substitute(function, inst, build.constDouble(double(unsigned(function.intOp(inst.a)))));
|
||||
break;
|
||||
case IrCmd::NUM_TO_INT:
|
||||
if (inst.a.kind == IrOpKind::Constant)
|
||||
{
|
||||
double value = function.doubleOp(inst.a);
|
||||
|
||||
// To avoid undefined behavior of casting a value not representable in the target type, we check the range
|
||||
if (value >= INT_MIN && value <= INT_MAX)
|
||||
substitute(function, inst, build.constInt(int(value)));
|
||||
}
|
||||
break;
|
||||
case IrCmd::NUM_TO_UINT:
|
||||
if (inst.a.kind == IrOpKind::Constant)
|
||||
{
|
||||
double value = function.doubleOp(inst.a);
|
||||
|
||||
// To avoid undefined behavior of casting a value not representable in the target type, we check the range
|
||||
if (value >= 0 && value <= UINT_MAX)
|
||||
substitute(function, inst, build.constInt(unsigned(function.doubleOp(inst.a))));
|
||||
}
|
||||
break;
|
||||
case IrCmd::CHECK_TAG:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
{
|
||||
if (function.tagOp(inst.a) == function.tagOp(inst.b))
|
||||
kill(function, inst);
|
||||
else
|
||||
replace(function, block, index, {IrCmd::JUMP, inst.c}); // Shows a conflict in assumptions on this path
|
||||
}
|
||||
break;
|
||||
case IrCmd::BITAND_UINT:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
{
|
||||
unsigned op1 = unsigned(function.intOp(inst.a));
|
||||
unsigned op2 = unsigned(function.intOp(inst.b));
|
||||
substitute(function, inst, build.constInt(op1 & op2));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (inst.a.kind == IrOpKind::Constant && function.intOp(inst.a) == 0) // (0 & b) -> 0
|
||||
substitute(function, inst, build.constInt(0));
|
||||
else if (inst.a.kind == IrOpKind::Constant && function.intOp(inst.a) == -1) // (-1 & b) -> b
|
||||
substitute(function, inst, inst.b);
|
||||
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0) // (a & 0) -> 0
|
||||
substitute(function, inst, build.constInt(0));
|
||||
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == -1) // (a & -1) -> a
|
||||
substitute(function, inst, inst.a);
|
||||
}
|
||||
break;
|
||||
case IrCmd::BITXOR_UINT:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
{
|
||||
unsigned op1 = unsigned(function.intOp(inst.a));
|
||||
unsigned op2 = unsigned(function.intOp(inst.b));
|
||||
substitute(function, inst, build.constInt(op1 ^ op2));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (inst.a.kind == IrOpKind::Constant && function.intOp(inst.a) == 0) // (0 ^ b) -> b
|
||||
substitute(function, inst, inst.b);
|
||||
else if (inst.a.kind == IrOpKind::Constant && function.intOp(inst.a) == -1) // (-1 ^ b) -> ~b
|
||||
replace(function, block, index, {IrCmd::BITNOT_UINT, inst.b});
|
||||
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0) // (a ^ 0) -> a
|
||||
substitute(function, inst, inst.a);
|
||||
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == -1) // (a ^ -1) -> ~a
|
||||
replace(function, block, index, {IrCmd::BITNOT_UINT, inst.a});
|
||||
}
|
||||
break;
|
||||
case IrCmd::BITOR_UINT:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
{
|
||||
unsigned op1 = unsigned(function.intOp(inst.a));
|
||||
unsigned op2 = unsigned(function.intOp(inst.b));
|
||||
substitute(function, inst, build.constInt(op1 | op2));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (inst.a.kind == IrOpKind::Constant && function.intOp(inst.a) == 0) // (0 | b) -> b
|
||||
substitute(function, inst, inst.b);
|
||||
else if (inst.a.kind == IrOpKind::Constant && function.intOp(inst.a) == -1) // (-1 | b) -> -1
|
||||
substitute(function, inst, build.constInt(-1));
|
||||
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0) // (a | 0) -> a
|
||||
substitute(function, inst, inst.a);
|
||||
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == -1) // (a | -1) -> -1
|
||||
substitute(function, inst, build.constInt(-1));
|
||||
}
|
||||
break;
|
||||
case IrCmd::BITNOT_UINT:
|
||||
if (inst.a.kind == IrOpKind::Constant)
|
||||
substitute(function, inst, build.constInt(~unsigned(function.intOp(inst.a))));
|
||||
break;
|
||||
case IrCmd::BITLSHIFT_UINT:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
{
|
||||
unsigned op1 = unsigned(function.intOp(inst.a));
|
||||
int op2 = function.intOp(inst.b);
|
||||
|
||||
if (unsigned(op2) < 32)
|
||||
substitute(function, inst, build.constInt(op1 << op2));
|
||||
}
|
||||
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0)
|
||||
{
|
||||
substitute(function, inst, inst.a);
|
||||
}
|
||||
break;
|
||||
case IrCmd::BITRSHIFT_UINT:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
{
|
||||
unsigned op1 = unsigned(function.intOp(inst.a));
|
||||
int op2 = function.intOp(inst.b);
|
||||
|
||||
if (unsigned(op2) < 32)
|
||||
substitute(function, inst, build.constInt(op1 >> op2));
|
||||
}
|
||||
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0)
|
||||
{
|
||||
substitute(function, inst, inst.a);
|
||||
}
|
||||
break;
|
||||
case IrCmd::BITARSHIFT_UINT:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
{
|
||||
int op1 = function.intOp(inst.a);
|
||||
int op2 = function.intOp(inst.b);
|
||||
|
||||
if (unsigned(op2) < 32)
|
||||
{
|
||||
// note: technically right shift of negative values is UB, but this behavior is getting defined in C++20 and all compilers do the
|
||||
// right (shift) thing.
|
||||
substitute(function, inst, build.constInt(op1 >> op2));
|
||||
}
|
||||
}
|
||||
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0)
|
||||
{
|
||||
substitute(function, inst, inst.a);
|
||||
}
|
||||
break;
|
||||
case IrCmd::BITLROTATE_UINT:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
substitute(function, inst, build.constInt(lrotate(unsigned(function.intOp(inst.a)), function.intOp(inst.b))));
|
||||
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0)
|
||||
substitute(function, inst, inst.a);
|
||||
break;
|
||||
case IrCmd::BITRROTATE_UINT:
|
||||
if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Constant)
|
||||
substitute(function, inst, build.constInt(rrotate(unsigned(function.intOp(inst.a)), function.intOp(inst.b))));
|
||||
else if (inst.b.kind == IrOpKind::Constant && function.intOp(inst.b) == 0)
|
||||
substitute(function, inst, inst.a);
|
||||
break;
|
||||
case IrCmd::BITCOUNTLZ_UINT:
|
||||
if (inst.a.kind == IrOpKind::Constant)
|
||||
substitute(function, inst, build.constInt(countlz(unsigned(function.intOp(inst.a)))));
|
||||
break;
|
||||
case IrCmd::BITCOUNTRZ_UINT:
|
||||
if (inst.a.kind == IrOpKind::Constant)
|
||||
substitute(function, inst, build.constInt(countrz(unsigned(function.intOp(inst.a)))));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t getNativeContextOffset(int bfid)
|
||||
{
|
||||
switch (bfid)
|
||||
{
|
||||
case LBF_MATH_ACOS:
|
||||
return offsetof(NativeContext, libm_acos);
|
||||
case LBF_MATH_ASIN:
|
||||
return offsetof(NativeContext, libm_asin);
|
||||
case LBF_MATH_ATAN2:
|
||||
return offsetof(NativeContext, libm_atan2);
|
||||
case LBF_MATH_ATAN:
|
||||
return offsetof(NativeContext, libm_atan);
|
||||
case LBF_MATH_COSH:
|
||||
return offsetof(NativeContext, libm_cosh);
|
||||
case LBF_MATH_COS:
|
||||
return offsetof(NativeContext, libm_cos);
|
||||
case LBF_MATH_EXP:
|
||||
return offsetof(NativeContext, libm_exp);
|
||||
case LBF_MATH_LOG10:
|
||||
return offsetof(NativeContext, libm_log10);
|
||||
case LBF_MATH_LOG:
|
||||
return offsetof(NativeContext, libm_log);
|
||||
case LBF_MATH_SINH:
|
||||
return offsetof(NativeContext, libm_sinh);
|
||||
case LBF_MATH_SIN:
|
||||
return offsetof(NativeContext, libm_sin);
|
||||
case LBF_MATH_TANH:
|
||||
return offsetof(NativeContext, libm_tanh);
|
||||
case LBF_MATH_TAN:
|
||||
return offsetof(NativeContext, libm_tan);
|
||||
case LBF_MATH_FMOD:
|
||||
return offsetof(NativeContext, libm_fmod);
|
||||
case LBF_MATH_POW:
|
||||
return offsetof(NativeContext, libm_pow);
|
||||
case LBF_IR_MATH_LOG2:
|
||||
return offsetof(NativeContext, libm_log2);
|
||||
case LBF_MATH_LDEXP:
|
||||
return offsetof(NativeContext, libm_ldexp);
|
||||
default:
|
||||
LUAU_ASSERT(!"Unsupported bfid");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,222 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "IrValueLocationTracking.h"
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
IrValueLocationTracking::IrValueLocationTracking(IrFunction& function)
|
||||
: function(function)
|
||||
{
|
||||
vmRegValue.fill(kInvalidInstIdx);
|
||||
}
|
||||
|
||||
void IrValueLocationTracking::setRestoreCallack(void* context, void (*callback)(void* context, IrInst& inst))
|
||||
{
|
||||
restoreCallbackCtx = context;
|
||||
restoreCallback = callback;
|
||||
}
|
||||
|
||||
void IrValueLocationTracking::beforeInstLowering(IrInst& inst)
|
||||
{
|
||||
switch (inst.cmd)
|
||||
{
|
||||
case IrCmd::STORE_TAG:
|
||||
case IrCmd::STORE_POINTER:
|
||||
case IrCmd::STORE_DOUBLE:
|
||||
case IrCmd::STORE_INT:
|
||||
case IrCmd::STORE_VECTOR:
|
||||
case IrCmd::STORE_TVALUE:
|
||||
invalidateRestoreOp(inst.a);
|
||||
break;
|
||||
case IrCmd::ADJUST_STACK_TO_REG:
|
||||
invalidateRestoreVmRegs(vmRegOp(inst.a), -1);
|
||||
break;
|
||||
case IrCmd::FASTCALL:
|
||||
invalidateRestoreVmRegs(vmRegOp(inst.b), function.intOp(inst.f));
|
||||
break;
|
||||
case IrCmd::INVOKE_FASTCALL:
|
||||
// Multiple return sequences (count == -1) are defined by ADJUST_STACK_TO_REG
|
||||
if (int count = function.intOp(inst.f); count != -1)
|
||||
invalidateRestoreVmRegs(vmRegOp(inst.b), count);
|
||||
break;
|
||||
case IrCmd::DO_ARITH:
|
||||
case IrCmd::DO_LEN:
|
||||
case IrCmd::GET_TABLE:
|
||||
case IrCmd::GET_IMPORT:
|
||||
invalidateRestoreOp(inst.a);
|
||||
break;
|
||||
case IrCmd::CONCAT:
|
||||
invalidateRestoreVmRegs(vmRegOp(inst.a), function.uintOp(inst.b));
|
||||
break;
|
||||
case IrCmd::GET_UPVALUE:
|
||||
invalidateRestoreOp(inst.a);
|
||||
break;
|
||||
case IrCmd::PREPARE_FORN:
|
||||
invalidateRestoreOp(inst.a);
|
||||
invalidateRestoreOp(inst.b);
|
||||
invalidateRestoreOp(inst.c);
|
||||
break;
|
||||
case IrCmd::CALL:
|
||||
// Even if result count is limited, all registers starting from function (ra) might be modified
|
||||
invalidateRestoreVmRegs(vmRegOp(inst.a), -1);
|
||||
break;
|
||||
case IrCmd::FORGLOOP:
|
||||
case IrCmd::FORGLOOP_FALLBACK:
|
||||
// Even if result count is limited, all registers starting from iteration index (ra+2) might be modified
|
||||
invalidateRestoreVmRegs(vmRegOp(inst.a) + 2, -1);
|
||||
break;
|
||||
case IrCmd::FALLBACK_GETGLOBAL:
|
||||
case IrCmd::FALLBACK_GETTABLEKS:
|
||||
invalidateRestoreOp(inst.b);
|
||||
break;
|
||||
case IrCmd::FALLBACK_NAMECALL:
|
||||
invalidateRestoreVmRegs(vmRegOp(inst.b), 2);
|
||||
break;
|
||||
case IrCmd::FALLBACK_GETVARARGS:
|
||||
invalidateRestoreVmRegs(vmRegOp(inst.b), function.intOp(inst.c));
|
||||
break;
|
||||
case IrCmd::FALLBACK_NEWCLOSURE:
|
||||
case IrCmd::FALLBACK_DUPCLOSURE:
|
||||
invalidateRestoreOp(inst.b);
|
||||
break;
|
||||
case IrCmd::FALLBACK_FORGPREP:
|
||||
invalidateRestoreVmRegs(vmRegOp(inst.b), 3);
|
||||
break;
|
||||
|
||||
// Make sure all VmReg referencing instructions are handled explicitly (only register reads here)
|
||||
case IrCmd::LOAD_TAG:
|
||||
case IrCmd::LOAD_POINTER:
|
||||
case IrCmd::LOAD_DOUBLE:
|
||||
case IrCmd::LOAD_INT:
|
||||
case IrCmd::LOAD_TVALUE:
|
||||
case IrCmd::JUMP_IF_TRUTHY:
|
||||
case IrCmd::JUMP_IF_FALSY:
|
||||
case IrCmd::JUMP_CMP_ANY:
|
||||
case IrCmd::SET_TABLE:
|
||||
case IrCmd::SET_UPVALUE:
|
||||
case IrCmd::INTERRUPT:
|
||||
case IrCmd::BARRIER_OBJ:
|
||||
case IrCmd::BARRIER_TABLE_FORWARD:
|
||||
case IrCmd::CLOSE_UPVALS:
|
||||
case IrCmd::CAPTURE:
|
||||
case IrCmd::SETLIST:
|
||||
case IrCmd::RETURN:
|
||||
case IrCmd::FORGPREP_XNEXT_FALLBACK:
|
||||
case IrCmd::FALLBACK_SETGLOBAL:
|
||||
case IrCmd::FALLBACK_SETTABLEKS:
|
||||
case IrCmd::FALLBACK_PREPVARARGS:
|
||||
case IrCmd::ADJUST_STACK_TO_TOP:
|
||||
break;
|
||||
|
||||
// These instrucitons read VmReg only after optimizeMemoryOperandsX64
|
||||
case IrCmd::CHECK_TAG:
|
||||
case IrCmd::ADD_NUM:
|
||||
case IrCmd::SUB_NUM:
|
||||
case IrCmd::MUL_NUM:
|
||||
case IrCmd::DIV_NUM:
|
||||
case IrCmd::MOD_NUM:
|
||||
case IrCmd::MIN_NUM:
|
||||
case IrCmd::MAX_NUM:
|
||||
case IrCmd::JUMP_EQ_TAG:
|
||||
case IrCmd::JUMP_CMP_NUM:
|
||||
break;
|
||||
|
||||
default:
|
||||
// All instructions which reference registers have to be handled explicitly
|
||||
LUAU_ASSERT(inst.a.kind != IrOpKind::VmReg);
|
||||
LUAU_ASSERT(inst.b.kind != IrOpKind::VmReg);
|
||||
LUAU_ASSERT(inst.c.kind != IrOpKind::VmReg);
|
||||
LUAU_ASSERT(inst.d.kind != IrOpKind::VmReg);
|
||||
LUAU_ASSERT(inst.e.kind != IrOpKind::VmReg);
|
||||
LUAU_ASSERT(inst.f.kind != IrOpKind::VmReg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void IrValueLocationTracking::afterInstLowering(IrInst& inst, uint32_t instIdx)
|
||||
{
|
||||
switch (inst.cmd)
|
||||
{
|
||||
case IrCmd::LOAD_TAG:
|
||||
case IrCmd::LOAD_POINTER:
|
||||
case IrCmd::LOAD_DOUBLE:
|
||||
case IrCmd::LOAD_INT:
|
||||
case IrCmd::LOAD_TVALUE:
|
||||
recordRestoreOp(instIdx, inst.a);
|
||||
break;
|
||||
case IrCmd::STORE_POINTER:
|
||||
case IrCmd::STORE_DOUBLE:
|
||||
case IrCmd::STORE_INT:
|
||||
case IrCmd::STORE_TVALUE:
|
||||
// If this is not the last use of the stored value, we can restore it from this new location
|
||||
if (inst.b.kind == IrOpKind::Inst && function.instOp(inst.b).lastUse != instIdx)
|
||||
recordRestoreOp(inst.b.index, inst.a);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void IrValueLocationTracking::recordRestoreOp(uint32_t instIdx, IrOp location)
|
||||
{
|
||||
if (location.kind == IrOpKind::VmReg)
|
||||
{
|
||||
int reg = vmRegOp(location);
|
||||
|
||||
if (reg > maxReg)
|
||||
maxReg = reg;
|
||||
|
||||
// Record location in register memory only if register is not captured
|
||||
if (!function.cfg.captured.regs.test(reg))
|
||||
function.recordRestoreOp(instIdx, location);
|
||||
|
||||
vmRegValue[reg] = instIdx;
|
||||
}
|
||||
else if (location.kind == IrOpKind::VmConst)
|
||||
{
|
||||
function.recordRestoreOp(instIdx, location);
|
||||
}
|
||||
}
|
||||
|
||||
void IrValueLocationTracking::invalidateRestoreOp(IrOp location)
|
||||
{
|
||||
if (location.kind == IrOpKind::VmReg)
|
||||
{
|
||||
uint32_t& instIdx = vmRegValue[vmRegOp(location)];
|
||||
|
||||
if (instIdx != kInvalidInstIdx)
|
||||
{
|
||||
IrInst& inst = function.instructions[instIdx];
|
||||
|
||||
// If instruction value is spilled and memory location is about to be lost, it has to be restored immediately
|
||||
if (inst.needsReload)
|
||||
restoreCallback(restoreCallbackCtx, inst);
|
||||
|
||||
// Instruction loses its memory storage location
|
||||
function.recordRestoreOp(instIdx, IrOp());
|
||||
|
||||
// Register loses link with instruction
|
||||
instIdx = kInvalidInstIdx;
|
||||
}
|
||||
}
|
||||
else if (location.kind == IrOpKind::VmConst)
|
||||
{
|
||||
LUAU_ASSERT(!"VM constants are immutable");
|
||||
}
|
||||
}
|
||||
|
||||
void IrValueLocationTracking::invalidateRestoreVmRegs(int start, int count)
|
||||
{
|
||||
int end = count == -1 ? 255 : start + count;
|
||||
|
||||
if (end > maxReg)
|
||||
end = maxReg;
|
||||
|
||||
for (int reg = start; reg <= end; reg++)
|
||||
invalidateRestoreOp(IrOp{IrOpKind::VmReg, uint8_t(reg)});
|
||||
}
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,38 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/IrData.h"
|
||||
|
||||
#include <array>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
struct IrValueLocationTracking
|
||||
{
|
||||
IrValueLocationTracking(IrFunction& function);
|
||||
|
||||
void setRestoreCallack(void* context, void (*callback)(void* context, IrInst& inst));
|
||||
|
||||
void beforeInstLowering(IrInst& inst);
|
||||
void afterInstLowering(IrInst& inst, uint32_t instIdx);
|
||||
|
||||
void recordRestoreOp(uint32_t instIdx, IrOp location);
|
||||
void invalidateRestoreOp(IrOp location);
|
||||
void invalidateRestoreVmRegs(int start, int count);
|
||||
|
||||
IrFunction& function;
|
||||
|
||||
std::array<uint32_t, 256> vmRegValue;
|
||||
|
||||
// For range/full invalidations, we only want to visit a limited number of data that we have recorded
|
||||
int maxReg = 0;
|
||||
|
||||
void* restoreCallbackCtx = nullptr;
|
||||
void (*restoreCallback)(void* context, IrInst& inst) = nullptr;
|
||||
};
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,111 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "NativeState.h"
|
||||
|
||||
#include "Luau/UnwindBuilder.h"
|
||||
|
||||
#include "CodeGenUtils.h"
|
||||
#include "CustomExecUtils.h"
|
||||
|
||||
#include "lbuiltins.h"
|
||||
#include "lgc.h"
|
||||
#include "ltable.h"
|
||||
#include "lfunc.h"
|
||||
#include "lvm.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
constexpr unsigned kBlockSize = 4 * 1024 * 1024;
|
||||
constexpr unsigned kMaxTotalSize = 256 * 1024 * 1024;
|
||||
|
||||
NativeState::NativeState()
|
||||
: codeAllocator(kBlockSize, kMaxTotalSize)
|
||||
{
|
||||
}
|
||||
|
||||
NativeState::~NativeState() = default;
|
||||
|
||||
void initFunctions(NativeState& data)
|
||||
{
|
||||
static_assert(sizeof(data.context.luauF_table) == sizeof(luauF_table), "fastcall tables are not of the same length");
|
||||
memcpy(data.context.luauF_table, luauF_table, sizeof(luauF_table));
|
||||
|
||||
data.context.luaV_lessthan = luaV_lessthan;
|
||||
data.context.luaV_lessequal = luaV_lessequal;
|
||||
data.context.luaV_equalval = luaV_equalval;
|
||||
data.context.luaV_doarith = luaV_doarith;
|
||||
data.context.luaV_dolen = luaV_dolen;
|
||||
data.context.luaV_prepareFORN = luaV_prepareFORN;
|
||||
data.context.luaV_gettable = luaV_gettable;
|
||||
data.context.luaV_settable = luaV_settable;
|
||||
data.context.luaV_getimport = luaV_getimport;
|
||||
data.context.luaV_concat = luaV_concat;
|
||||
|
||||
data.context.luaH_getn = luaH_getn;
|
||||
data.context.luaH_new = luaH_new;
|
||||
data.context.luaH_clone = luaH_clone;
|
||||
data.context.luaH_resizearray = luaH_resizearray;
|
||||
|
||||
data.context.luaC_barriertable = luaC_barriertable;
|
||||
data.context.luaC_barrierf = luaC_barrierf;
|
||||
data.context.luaC_barrierback = luaC_barrierback;
|
||||
data.context.luaC_step = luaC_step;
|
||||
|
||||
data.context.luaF_close = luaF_close;
|
||||
|
||||
data.context.luaT_gettm = luaT_gettm;
|
||||
data.context.luaT_objtypenamestr = luaT_objtypenamestr;
|
||||
|
||||
data.context.libm_exp = exp;
|
||||
data.context.libm_pow = pow;
|
||||
data.context.libm_fmod = fmod;
|
||||
data.context.libm_log = log;
|
||||
data.context.libm_log2 = log2;
|
||||
data.context.libm_log10 = log10;
|
||||
data.context.libm_ldexp = ldexp;
|
||||
data.context.libm_round = round;
|
||||
data.context.libm_frexp = frexp;
|
||||
data.context.libm_modf = modf;
|
||||
|
||||
data.context.libm_asin = asin;
|
||||
data.context.libm_sin = sin;
|
||||
data.context.libm_sinh = sinh;
|
||||
data.context.libm_acos = acos;
|
||||
data.context.libm_cos = cos;
|
||||
data.context.libm_cosh = cosh;
|
||||
data.context.libm_atan = atan;
|
||||
data.context.libm_atan2 = atan2;
|
||||
data.context.libm_tan = tan;
|
||||
data.context.libm_tanh = tanh;
|
||||
|
||||
data.context.forgLoopTableIter = forgLoopTableIter;
|
||||
data.context.forgLoopNodeIter = forgLoopNodeIter;
|
||||
data.context.forgLoopNonTableFallback = forgLoopNonTableFallback;
|
||||
data.context.forgPrepXnextFallback = forgPrepXnextFallback;
|
||||
data.context.callProlog = callProlog;
|
||||
data.context.callEpilogC = callEpilogC;
|
||||
|
||||
data.context.callFallback = callFallback;
|
||||
data.context.returnFallback = returnFallback;
|
||||
|
||||
data.context.executeGETGLOBAL = executeGETGLOBAL;
|
||||
data.context.executeSETGLOBAL = executeSETGLOBAL;
|
||||
data.context.executeGETTABLEKS = executeGETTABLEKS;
|
||||
data.context.executeSETTABLEKS = executeSETTABLEKS;
|
||||
|
||||
data.context.executeNEWCLOSURE = executeNEWCLOSURE;
|
||||
data.context.executeNAMECALL = executeNAMECALL;
|
||||
data.context.executeFORGPREP = executeFORGPREP;
|
||||
data.context.executeGETVARARGS = executeGETVARARGS;
|
||||
data.context.executeDUPCLOSURE = executeDUPCLOSURE;
|
||||
data.context.executePREPVARARGS = executePREPVARARGS;
|
||||
data.context.executeSETLIST = executeSETLIST;
|
||||
}
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,127 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/Bytecode.h"
|
||||
#include "Luau/CodeAllocator.h"
|
||||
#include "Luau/Label.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "ldebug.h"
|
||||
#include "lobject.h"
|
||||
#include "ltm.h"
|
||||
#include "lstate.h"
|
||||
|
||||
typedef int (*luau_FastFunction)(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams);
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
class UnwindBuilder;
|
||||
|
||||
struct NativeContext
|
||||
{
|
||||
// Gateway (C => native transition) entry & exit, compiled at runtime
|
||||
uint8_t* gateEntry = nullptr;
|
||||
uint8_t* gateExit = nullptr;
|
||||
|
||||
// Helper functions, implemented in C
|
||||
int (*luaV_lessthan)(lua_State* L, const TValue* l, const TValue* r) = nullptr;
|
||||
int (*luaV_lessequal)(lua_State* L, const TValue* l, const TValue* r) = nullptr;
|
||||
int (*luaV_equalval)(lua_State* L, const TValue* t1, const TValue* t2) = nullptr;
|
||||
void (*luaV_doarith)(lua_State* L, StkId ra, const TValue* rb, const TValue* rc, TMS op) = nullptr;
|
||||
void (*luaV_dolen)(lua_State* L, StkId ra, const TValue* rb) = nullptr;
|
||||
void (*luaV_prepareFORN)(lua_State* L, StkId plimit, StkId pstep, StkId pinit) = nullptr;
|
||||
void (*luaV_gettable)(lua_State* L, const TValue* t, TValue* key, StkId val) = nullptr;
|
||||
void (*luaV_settable)(lua_State* L, const TValue* t, TValue* key, StkId val) = nullptr;
|
||||
void (*luaV_getimport)(lua_State* L, Table* env, TValue* k, uint32_t id, bool propagatenil) = nullptr;
|
||||
void (*luaV_concat)(lua_State* L, int total, int last) = nullptr;
|
||||
|
||||
int (*luaH_getn)(Table* t) = nullptr;
|
||||
Table* (*luaH_new)(lua_State* L, int narray, int lnhash) = nullptr;
|
||||
Table* (*luaH_clone)(lua_State* L, Table* tt) = nullptr;
|
||||
void (*luaH_resizearray)(lua_State* L, Table* t, int nasize) = nullptr;
|
||||
|
||||
void (*luaC_barriertable)(lua_State* L, Table* t, GCObject* v) = nullptr;
|
||||
void (*luaC_barrierf)(lua_State* L, GCObject* o, GCObject* v) = nullptr;
|
||||
void (*luaC_barrierback)(lua_State* L, GCObject* o, GCObject** gclist) = nullptr;
|
||||
size_t (*luaC_step)(lua_State* L, bool assist) = nullptr;
|
||||
|
||||
void (*luaF_close)(lua_State* L, StkId level) = nullptr;
|
||||
|
||||
const TValue* (*luaT_gettm)(Table* events, TMS event, TString* ename) = nullptr;
|
||||
const TString* (*luaT_objtypenamestr)(lua_State* L, const TValue* o) = nullptr;
|
||||
|
||||
double (*libm_exp)(double) = nullptr;
|
||||
double (*libm_pow)(double, double) = nullptr;
|
||||
double (*libm_fmod)(double, double) = nullptr;
|
||||
double (*libm_asin)(double) = nullptr;
|
||||
double (*libm_sin)(double) = nullptr;
|
||||
double (*libm_sinh)(double) = nullptr;
|
||||
double (*libm_acos)(double) = nullptr;
|
||||
double (*libm_cos)(double) = nullptr;
|
||||
double (*libm_cosh)(double) = nullptr;
|
||||
double (*libm_atan)(double) = nullptr;
|
||||
double (*libm_atan2)(double, double) = nullptr;
|
||||
double (*libm_tan)(double) = nullptr;
|
||||
double (*libm_tanh)(double) = nullptr;
|
||||
double (*libm_log)(double) = nullptr;
|
||||
double (*libm_log2)(double) = nullptr;
|
||||
double (*libm_log10)(double) = nullptr;
|
||||
double (*libm_ldexp)(double, int) = nullptr;
|
||||
double (*libm_round)(double) = nullptr;
|
||||
double (*libm_frexp)(double, int*) = nullptr;
|
||||
double (*libm_modf)(double, double*) = nullptr;
|
||||
|
||||
// Helper functions
|
||||
bool (*forgLoopTableIter)(lua_State* L, Table* h, int index, TValue* ra) = nullptr;
|
||||
bool (*forgLoopNodeIter)(lua_State* L, Table* h, int index, TValue* ra) = nullptr;
|
||||
bool (*forgLoopNonTableFallback)(lua_State* L, int insnA, int aux) = nullptr;
|
||||
void (*forgPrepXnextFallback)(lua_State* L, TValue* ra, int pc) = nullptr;
|
||||
Closure* (*callProlog)(lua_State* L, TValue* ra, StkId argtop, int nresults) = nullptr;
|
||||
void (*callEpilogC)(lua_State* L, int nresults, int n) = nullptr;
|
||||
|
||||
Closure* (*callFallback)(lua_State* L, StkId ra, StkId argtop, int nresults) = nullptr;
|
||||
Closure* (*returnFallback)(lua_State* L, StkId ra, StkId valend) = nullptr;
|
||||
|
||||
// Opcode fallbacks, implemented in C
|
||||
const Instruction* (*executeGETGLOBAL)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
|
||||
const Instruction* (*executeSETGLOBAL)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
|
||||
const Instruction* (*executeGETTABLEKS)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
|
||||
const Instruction* (*executeSETTABLEKS)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
|
||||
const Instruction* (*executeNEWCLOSURE)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
|
||||
const Instruction* (*executeNAMECALL)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
|
||||
const Instruction* (*executeSETLIST)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
|
||||
const Instruction* (*executeFORGPREP)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
|
||||
const Instruction* (*executeGETVARARGS)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
|
||||
const Instruction* (*executeDUPCLOSURE)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
|
||||
const Instruction* (*executePREPVARARGS)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;
|
||||
|
||||
// Fast call methods, implemented in C
|
||||
luau_FastFunction luauF_table[256] = {};
|
||||
};
|
||||
|
||||
using GateFn = int (*)(lua_State*, Proto*, uintptr_t, NativeContext*);
|
||||
|
||||
struct NativeState
|
||||
{
|
||||
NativeState();
|
||||
~NativeState();
|
||||
|
||||
CodeAllocator codeAllocator;
|
||||
std::unique_ptr<UnwindBuilder> unwindBuilder;
|
||||
|
||||
uint8_t* gateData = nullptr;
|
||||
size_t gateDataSize = 0;
|
||||
|
||||
NativeContext context;
|
||||
};
|
||||
|
||||
void initFunctions(NativeState& data);
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,109 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "Luau/OptimizeFinalX64.h"
|
||||
|
||||
#include "Luau/IrUtils.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
// x64 assembly allows memory operands, but IR separates loads from uses
|
||||
// To improve final x64 lowering, we try to 'inline' single-use register/constant loads into some of our instructions
|
||||
// This pass might not be useful on different architectures
|
||||
static void optimizeMemoryOperandsX64(IrFunction& function, IrBlock& block)
|
||||
{
|
||||
LUAU_ASSERT(block.kind != IrBlockKind::Dead);
|
||||
|
||||
for (uint32_t index = block.start; index <= block.finish; index++)
|
||||
{
|
||||
LUAU_ASSERT(index < function.instructions.size());
|
||||
IrInst& inst = function.instructions[index];
|
||||
|
||||
switch (inst.cmd)
|
||||
{
|
||||
case IrCmd::CHECK_TAG:
|
||||
{
|
||||
if (inst.a.kind == IrOpKind::Inst)
|
||||
{
|
||||
IrInst& tag = function.instOp(inst.a);
|
||||
|
||||
if (tag.useCount == 1 && tag.cmd == IrCmd::LOAD_TAG && (tag.a.kind == IrOpKind::VmReg || tag.a.kind == IrOpKind::VmConst))
|
||||
replace(function, inst.a, tag.a);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IrCmd::ADD_NUM:
|
||||
case IrCmd::SUB_NUM:
|
||||
case IrCmd::MUL_NUM:
|
||||
case IrCmd::DIV_NUM:
|
||||
case IrCmd::MOD_NUM:
|
||||
case IrCmd::MIN_NUM:
|
||||
case IrCmd::MAX_NUM:
|
||||
{
|
||||
if (inst.b.kind == IrOpKind::Inst)
|
||||
{
|
||||
IrInst& rhs = function.instOp(inst.b);
|
||||
|
||||
if (rhs.useCount == 1 && rhs.cmd == IrCmd::LOAD_DOUBLE && (rhs.a.kind == IrOpKind::VmReg || rhs.a.kind == IrOpKind::VmConst))
|
||||
replace(function, inst.b, rhs.a);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IrCmd::JUMP_EQ_TAG:
|
||||
{
|
||||
if (inst.a.kind == IrOpKind::Inst)
|
||||
{
|
||||
IrInst& tagA = function.instOp(inst.a);
|
||||
|
||||
if (tagA.useCount == 1 && tagA.cmd == IrCmd::LOAD_TAG && (tagA.a.kind == IrOpKind::VmReg || tagA.a.kind == IrOpKind::VmConst))
|
||||
{
|
||||
replace(function, inst.a, tagA.a);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst.b.kind == IrOpKind::Inst)
|
||||
{
|
||||
IrInst& tagB = function.instOp(inst.b);
|
||||
|
||||
if (tagB.useCount == 1 && tagB.cmd == IrCmd::LOAD_TAG && (tagB.a.kind == IrOpKind::VmReg || tagB.a.kind == IrOpKind::VmConst))
|
||||
{
|
||||
std::swap(inst.a, inst.b);
|
||||
replace(function, inst.a, tagB.a);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IrCmd::JUMP_CMP_NUM:
|
||||
{
|
||||
if (inst.a.kind == IrOpKind::Inst)
|
||||
{
|
||||
IrInst& num = function.instOp(inst.a);
|
||||
|
||||
if (num.useCount == 1 && num.cmd == IrCmd::LOAD_DOUBLE)
|
||||
replace(function, inst.a, num.a);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void optimizeMemoryOperandsX64(IrFunction& function)
|
||||
{
|
||||
for (IrBlock& block : function.blocks)
|
||||
{
|
||||
if (block.kind == IrBlockKind::Dead)
|
||||
continue;
|
||||
|
||||
optimizeMemoryOperandsX64(function, block);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,299 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "Luau/UnwindBuilderDwarf2.h"
|
||||
|
||||
#include "ByteUtils.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
// General information about Dwarf2 format can be found at:
|
||||
// https://dwarfstd.org/doc/dwarf-2.0.0.pdf [DWARF Debugging Information Format]
|
||||
// Main part for async exception unwinding is in section '6.4 Call Frame Information'
|
||||
|
||||
// Information about System V ABI (AMD64) can be found at:
|
||||
// https://refspecs.linuxbase.org/elf/x86_64-abi-0.99.pdf [System V Application Binary Interface (AMD64 Architecture Processor Supplement)]
|
||||
// Interaction between Dwarf2 and System V ABI can be found in sections '3.6.2 DWARF Register Number Mapping' and '4.2.4 EH_FRAME sections'
|
||||
|
||||
// Call frame instruction opcodes (Dwarf2, page 78, ch. 7.23 figure 37)
|
||||
#define DW_CFA_advance_loc 0x40
|
||||
#define DW_CFA_offset 0x80
|
||||
#define DW_CFA_restore 0xc0
|
||||
#define DW_CFA_set_loc 0x01
|
||||
#define DW_CFA_advance_loc1 0x02
|
||||
#define DW_CFA_advance_loc2 0x03
|
||||
#define DW_CFA_advance_loc4 0x04
|
||||
#define DW_CFA_offset_extended 0x05
|
||||
#define DW_CFA_restore_extended 0x06
|
||||
#define DW_CFA_undefined 0x07
|
||||
#define DW_CFA_same_value 0x08
|
||||
#define DW_CFA_register 0x09
|
||||
#define DW_CFA_remember_state 0x0a
|
||||
#define DW_CFA_restore_state 0x0b
|
||||
#define DW_CFA_def_cfa 0x0c
|
||||
#define DW_CFA_def_cfa_register 0x0d
|
||||
#define DW_CFA_def_cfa_offset 0x0e
|
||||
#define DW_CFA_def_cfa_expression 0x0f
|
||||
#define DW_CFA_nop 0x00
|
||||
#define DW_CFA_lo_user 0x1c
|
||||
#define DW_CFA_hi_user 0x3f
|
||||
|
||||
// Register numbers for X64 (System V ABI, page 57, ch. 3.7, figure 3.36)
|
||||
#define DW_REG_X64_RAX 0
|
||||
#define DW_REG_X64_RDX 1
|
||||
#define DW_REG_X64_RCX 2
|
||||
#define DW_REG_X64_RBX 3
|
||||
#define DW_REG_X64_RSI 4
|
||||
#define DW_REG_X64_RDI 5
|
||||
#define DW_REG_X64_RBP 6
|
||||
#define DW_REG_X64_RSP 7
|
||||
#define DW_REG_X64_RA 16
|
||||
|
||||
// Register numbers for A64 (DWARF for the Arm 64-bit Architecture, ch. 4.1)
|
||||
#define DW_REG_A64_FP 29
|
||||
#define DW_REG_A64_LR 30
|
||||
#define DW_REG_A64_SP 31
|
||||
|
||||
// X64 register mapping from real register index to DWARF2 (r8..r15 are mapped 1-1, but named registers aren't)
|
||||
const int regIndexToDwRegX64[16] = {DW_REG_X64_RAX, DW_REG_X64_RCX, DW_REG_X64_RDX, DW_REG_X64_RBX, DW_REG_X64_RSP, DW_REG_X64_RBP, DW_REG_X64_RSI,
|
||||
DW_REG_X64_RDI, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
|
||||
const int kCodeAlignFactor = 1;
|
||||
const int kDataAlignFactor = 8;
|
||||
const int kDwarfAlign = 8;
|
||||
const int kFdeInitialLocationOffset = 8;
|
||||
const int kFdeAddressRangeOffset = 16;
|
||||
|
||||
// Define canonical frame address expression as [reg + offset]
|
||||
static uint8_t* defineCfaExpression(uint8_t* pos, int dwReg, uint32_t stackOffset)
|
||||
{
|
||||
pos = writeu8(pos, DW_CFA_def_cfa);
|
||||
pos = writeuleb128(pos, dwReg);
|
||||
pos = writeuleb128(pos, stackOffset);
|
||||
return pos;
|
||||
}
|
||||
|
||||
// Update offset value in canonical frame address expression
|
||||
static uint8_t* defineCfaExpressionOffset(uint8_t* pos, uint32_t stackOffset)
|
||||
{
|
||||
pos = writeu8(pos, DW_CFA_def_cfa_offset);
|
||||
pos = writeuleb128(pos, stackOffset);
|
||||
return pos;
|
||||
}
|
||||
|
||||
static uint8_t* defineSavedRegisterLocation(uint8_t* pos, int dwReg, uint32_t stackOffset)
|
||||
{
|
||||
LUAU_ASSERT(stackOffset % kDataAlignFactor == 0 && "stack offsets have to be measured in kDataAlignFactor units");
|
||||
|
||||
if (dwReg <= 0x3f)
|
||||
{
|
||||
pos = writeu8(pos, DW_CFA_offset + dwReg);
|
||||
}
|
||||
else
|
||||
{
|
||||
pos = writeu8(pos, DW_CFA_offset_extended);
|
||||
pos = writeuleb128(pos, dwReg);
|
||||
}
|
||||
|
||||
pos = writeuleb128(pos, stackOffset / kDataAlignFactor);
|
||||
return pos;
|
||||
}
|
||||
|
||||
static uint8_t* advanceLocation(uint8_t* pos, unsigned int offset)
|
||||
{
|
||||
LUAU_ASSERT(offset < 256);
|
||||
pos = writeu8(pos, DW_CFA_advance_loc1);
|
||||
pos = writeu8(pos, offset);
|
||||
return pos;
|
||||
}
|
||||
|
||||
static uint8_t* alignPosition(uint8_t* start, uint8_t* pos)
|
||||
{
|
||||
size_t size = pos - start;
|
||||
size_t pad = ((size + kDwarfAlign - 1) & ~(kDwarfAlign - 1)) - size;
|
||||
|
||||
for (size_t i = 0; i < pad; i++)
|
||||
pos = writeu8(pos, DW_CFA_nop);
|
||||
|
||||
return pos;
|
||||
}
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
void UnwindBuilderDwarf2::setBeginOffset(size_t beginOffset)
|
||||
{
|
||||
this->beginOffset = beginOffset;
|
||||
}
|
||||
|
||||
size_t UnwindBuilderDwarf2::getBeginOffset() const
|
||||
{
|
||||
return beginOffset;
|
||||
}
|
||||
|
||||
void UnwindBuilderDwarf2::startInfo(Arch arch)
|
||||
{
|
||||
LUAU_ASSERT(arch == A64 || arch == X64);
|
||||
|
||||
uint8_t* cieLength = pos;
|
||||
pos = writeu32(pos, 0); // Length (to be filled later)
|
||||
|
||||
pos = writeu32(pos, 0); // CIE id. 0 -- .eh_frame
|
||||
pos = writeu8(pos, 1); // Version
|
||||
|
||||
pos = writeu8(pos, 0); // CIE augmentation String ""
|
||||
|
||||
int ra = arch == A64 ? DW_REG_A64_LR : DW_REG_X64_RA;
|
||||
|
||||
pos = writeuleb128(pos, kCodeAlignFactor); // Code align factor
|
||||
pos = writeuleb128(pos, -kDataAlignFactor & 0x7f); // Data align factor of (as signed LEB128)
|
||||
pos = writeu8(pos, ra); // Return address register
|
||||
|
||||
// Optional CIE augmentation section (not present)
|
||||
|
||||
// Call frame instructions (common for all FDEs)
|
||||
if (arch == A64)
|
||||
{
|
||||
pos = defineCfaExpression(pos, DW_REG_A64_SP, 0); // Define CFA to be the sp
|
||||
}
|
||||
else
|
||||
{
|
||||
pos = defineCfaExpression(pos, DW_REG_X64_RSP, 8); // Define CFA to be the rsp + 8
|
||||
pos = defineSavedRegisterLocation(pos, DW_REG_X64_RA, 8); // Define return address register (RA) to be located at CFA - 8
|
||||
}
|
||||
|
||||
pos = alignPosition(cieLength, pos);
|
||||
writeu32(cieLength, unsigned(pos - cieLength - 4)); // Length field itself is excluded from length
|
||||
}
|
||||
|
||||
void UnwindBuilderDwarf2::startFunction()
|
||||
{
|
||||
// End offset is filled in later and everything gets adjusted at the end
|
||||
UnwindFunctionDwarf2 func;
|
||||
func.beginOffset = 0;
|
||||
func.endOffset = 0;
|
||||
func.fdeEntryStartPos = uint32_t(pos - rawData);
|
||||
unwindFunctions.push_back(func);
|
||||
|
||||
fdeEntryStart = pos; // Will be written at the end
|
||||
pos = writeu32(pos, 0); // Length (to be filled later)
|
||||
pos = writeu32(pos, unsigned(pos - rawData)); // CIE pointer
|
||||
pos = writeu64(pos, 0); // Initial location (to be filled later)
|
||||
pos = writeu64(pos, 0); // Address range (to be filled later)
|
||||
|
||||
// Optional CIE augmentation section (not present)
|
||||
|
||||
// Function call frame instructions to follow
|
||||
}
|
||||
|
||||
void UnwindBuilderDwarf2::finishFunction(uint32_t beginOffset, uint32_t endOffset)
|
||||
{
|
||||
unwindFunctions.back().beginOffset = beginOffset;
|
||||
unwindFunctions.back().endOffset = endOffset;
|
||||
|
||||
LUAU_ASSERT(fdeEntryStart != nullptr);
|
||||
|
||||
pos = alignPosition(fdeEntryStart, pos);
|
||||
writeu32(fdeEntryStart, unsigned(pos - fdeEntryStart - 4)); // Length field itself is excluded from length
|
||||
}
|
||||
|
||||
void UnwindBuilderDwarf2::finishInfo()
|
||||
{
|
||||
// Terminate section
|
||||
pos = writeu32(pos, 0);
|
||||
|
||||
LUAU_ASSERT(getSize() <= kRawDataLimit);
|
||||
}
|
||||
|
||||
void UnwindBuilderDwarf2::prologueA64(uint32_t prologueSize, uint32_t stackSize, std::initializer_list<A64::RegisterA64> regs)
|
||||
{
|
||||
LUAU_ASSERT(stackSize % 16 == 0);
|
||||
LUAU_ASSERT(regs.size() >= 2 && regs.begin()[0] == A64::x29 && regs.begin()[1] == A64::x30);
|
||||
LUAU_ASSERT(regs.size() * 8 <= stackSize);
|
||||
|
||||
// sub sp, sp, stackSize
|
||||
pos = advanceLocation(pos, 4);
|
||||
pos = defineCfaExpressionOffset(pos, stackSize);
|
||||
|
||||
// stp/str to store each register to stack in order
|
||||
pos = advanceLocation(pos, prologueSize - 4);
|
||||
|
||||
for (size_t i = 0; i < regs.size(); ++i)
|
||||
{
|
||||
LUAU_ASSERT(regs.begin()[i].kind == A64::KindA64::x);
|
||||
pos = defineSavedRegisterLocation(pos, regs.begin()[i].index, stackSize - unsigned(i * 8));
|
||||
}
|
||||
}
|
||||
|
||||
void UnwindBuilderDwarf2::prologueX64(uint32_t prologueSize, uint32_t stackSize, bool setupFrame, std::initializer_list<X64::RegisterX64> regs)
|
||||
{
|
||||
LUAU_ASSERT(stackSize > 0 && stackSize <= 128 && stackSize % 8 == 0);
|
||||
|
||||
unsigned int stackOffset = 8; // Return address was pushed by calling the function
|
||||
unsigned int prologueOffset = 0;
|
||||
|
||||
if (setupFrame)
|
||||
{
|
||||
// push rbp
|
||||
stackOffset += 8;
|
||||
prologueOffset += 2;
|
||||
pos = advanceLocation(pos, 2);
|
||||
pos = defineCfaExpressionOffset(pos, stackOffset);
|
||||
pos = defineSavedRegisterLocation(pos, DW_REG_X64_RBP, stackOffset);
|
||||
|
||||
// mov rbp, rsp
|
||||
prologueOffset += 3;
|
||||
pos = advanceLocation(pos, 3);
|
||||
}
|
||||
|
||||
// push reg
|
||||
for (X64::RegisterX64 reg : regs)
|
||||
{
|
||||
LUAU_ASSERT(reg.size == X64::SizeX64::qword);
|
||||
|
||||
stackOffset += 8;
|
||||
prologueOffset += 2;
|
||||
pos = advanceLocation(pos, 2);
|
||||
pos = defineCfaExpressionOffset(pos, stackOffset);
|
||||
pos = defineSavedRegisterLocation(pos, regIndexToDwRegX64[reg.index], stackOffset);
|
||||
}
|
||||
|
||||
// sub rsp, stackSize
|
||||
stackOffset += stackSize;
|
||||
prologueOffset += 4;
|
||||
pos = advanceLocation(pos, 4);
|
||||
pos = defineCfaExpressionOffset(pos, stackOffset);
|
||||
|
||||
LUAU_ASSERT(stackOffset % 16 == 0);
|
||||
LUAU_ASSERT(prologueOffset == prologueSize);
|
||||
}
|
||||
|
||||
size_t UnwindBuilderDwarf2::getSize() const
|
||||
{
|
||||
return size_t(pos - rawData);
|
||||
}
|
||||
|
||||
size_t UnwindBuilderDwarf2::getFunctionCount() const
|
||||
{
|
||||
return unwindFunctions.size();
|
||||
}
|
||||
|
||||
void UnwindBuilderDwarf2::finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const
|
||||
{
|
||||
memcpy(target, rawData, getSize());
|
||||
|
||||
for (const UnwindFunctionDwarf2& func : unwindFunctions)
|
||||
{
|
||||
uint8_t* fdeEntry = (uint8_t*)target + func.fdeEntryStartPos;
|
||||
|
||||
writeu64(fdeEntry + kFdeInitialLocationOffset, uintptr_t(funcAddress) + offset + func.beginOffset);
|
||||
|
||||
if (func.endOffset == kFullBlockFuncton)
|
||||
writeu64(fdeEntry + kFdeAddressRangeOffset, funcSize - offset);
|
||||
else
|
||||
writeu64(fdeEntry + kFdeAddressRangeOffset, func.endOffset - func.beginOffset);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,190 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "Luau/UnwindBuilderWin.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
// Information about the Windows x64 unwinding data setup can be found at:
|
||||
// https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64 [x64 exception handling]
|
||||
|
||||
#define UWOP_PUSH_NONVOL 0
|
||||
#define UWOP_ALLOC_LARGE 1
|
||||
#define UWOP_ALLOC_SMALL 2
|
||||
#define UWOP_SET_FPREG 3
|
||||
#define UWOP_SAVE_NONVOL 4
|
||||
#define UWOP_SAVE_NONVOL_FAR 5
|
||||
#define UWOP_SAVE_XMM128 8
|
||||
#define UWOP_SAVE_XMM128_FAR 9
|
||||
#define UWOP_PUSH_MACHFRAME 10
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace CodeGen
|
||||
{
|
||||
|
||||
void UnwindBuilderWin::setBeginOffset(size_t beginOffset)
|
||||
{
|
||||
this->beginOffset = beginOffset;
|
||||
}
|
||||
|
||||
size_t UnwindBuilderWin::getBeginOffset() const
|
||||
{
|
||||
return beginOffset;
|
||||
}
|
||||
|
||||
void UnwindBuilderWin::startInfo(Arch arch)
|
||||
{
|
||||
LUAU_ASSERT(arch == X64);
|
||||
}
|
||||
|
||||
void UnwindBuilderWin::startFunction()
|
||||
{
|
||||
// End offset is filled in later and everything gets adjusted at the end
|
||||
UnwindFunctionWin func;
|
||||
func.beginOffset = 0;
|
||||
func.endOffset = 0;
|
||||
func.unwindInfoOffset = uint32_t(rawDataPos - rawData);
|
||||
unwindFunctions.push_back(func);
|
||||
|
||||
unwindCodes.clear();
|
||||
unwindCodes.reserve(16);
|
||||
|
||||
prologSize = 0;
|
||||
|
||||
// rax has register index 0, which in Windows unwind info means that frame register is not used
|
||||
frameReg = X64::rax;
|
||||
frameRegOffset = 0;
|
||||
}
|
||||
|
||||
void UnwindBuilderWin::finishFunction(uint32_t beginOffset, uint32_t endOffset)
|
||||
{
|
||||
unwindFunctions.back().beginOffset = beginOffset;
|
||||
unwindFunctions.back().endOffset = endOffset;
|
||||
|
||||
// Windows unwind code count is stored in uint8_t, so we can't have more
|
||||
LUAU_ASSERT(unwindCodes.size() < 256);
|
||||
|
||||
UnwindInfoWin info;
|
||||
info.version = 1;
|
||||
info.flags = 0; // No EH
|
||||
info.prologsize = prologSize;
|
||||
info.unwindcodecount = uint8_t(unwindCodes.size());
|
||||
|
||||
LUAU_ASSERT(frameReg.index < 16);
|
||||
info.framereg = frameReg.index;
|
||||
|
||||
LUAU_ASSERT(frameRegOffset < 16);
|
||||
info.frameregoff = frameRegOffset;
|
||||
|
||||
LUAU_ASSERT(rawDataPos + sizeof(info) <= rawData + kRawDataLimit);
|
||||
memcpy(rawDataPos, &info, sizeof(info));
|
||||
rawDataPos += sizeof(info);
|
||||
|
||||
if (!unwindCodes.empty())
|
||||
{
|
||||
// Copy unwind codes in reverse order
|
||||
// Some unwind codes take up two array slots, but we don't use those atm
|
||||
uint8_t* unwindCodePos = rawDataPos + sizeof(UnwindCodeWin) * (unwindCodes.size() - 1);
|
||||
LUAU_ASSERT(unwindCodePos <= rawData + kRawDataLimit);
|
||||
|
||||
for (size_t i = 0; i < unwindCodes.size(); i++)
|
||||
{
|
||||
memcpy(unwindCodePos, &unwindCodes[i], sizeof(UnwindCodeWin));
|
||||
unwindCodePos -= sizeof(UnwindCodeWin);
|
||||
}
|
||||
}
|
||||
|
||||
rawDataPos += sizeof(UnwindCodeWin) * unwindCodes.size();
|
||||
|
||||
// Size has to be even, but unwind code count doesn't have to
|
||||
if (unwindCodes.size() % 2 != 0)
|
||||
rawDataPos += sizeof(UnwindCodeWin);
|
||||
|
||||
LUAU_ASSERT(rawDataPos <= rawData + kRawDataLimit);
|
||||
}
|
||||
|
||||
void UnwindBuilderWin::finishInfo() {}
|
||||
|
||||
void UnwindBuilderWin::prologueA64(uint32_t prologueSize, uint32_t stackSize, std::initializer_list<A64::RegisterA64> regs)
|
||||
{
|
||||
LUAU_ASSERT(!"Not implemented");
|
||||
}
|
||||
|
||||
void UnwindBuilderWin::prologueX64(uint32_t prologueSize, uint32_t stackSize, bool setupFrame, std::initializer_list<X64::RegisterX64> regs)
|
||||
{
|
||||
LUAU_ASSERT(stackSize > 0 && stackSize <= 128 && stackSize % 8 == 0);
|
||||
LUAU_ASSERT(prologueSize < 256);
|
||||
|
||||
unsigned int stackOffset = 8; // Return address was pushed by calling the function
|
||||
unsigned int prologueOffset = 0;
|
||||
|
||||
if (setupFrame)
|
||||
{
|
||||
// push rbp
|
||||
stackOffset += 8;
|
||||
prologueOffset += 2;
|
||||
unwindCodes.push_back({uint8_t(prologueOffset), UWOP_PUSH_NONVOL, X64::rbp.index});
|
||||
|
||||
// mov rbp, rsp
|
||||
prologueOffset += 3;
|
||||
frameReg = X64::rbp;
|
||||
frameRegOffset = 0;
|
||||
unwindCodes.push_back({uint8_t(prologueOffset), UWOP_SET_FPREG, frameRegOffset});
|
||||
}
|
||||
|
||||
// push reg
|
||||
for (X64::RegisterX64 reg : regs)
|
||||
{
|
||||
LUAU_ASSERT(reg.size == X64::SizeX64::qword);
|
||||
|
||||
stackOffset += 8;
|
||||
prologueOffset += 2;
|
||||
unwindCodes.push_back({uint8_t(prologueOffset), UWOP_PUSH_NONVOL, reg.index});
|
||||
}
|
||||
|
||||
// sub rsp, stackSize
|
||||
stackOffset += stackSize;
|
||||
prologueOffset += 4;
|
||||
unwindCodes.push_back({uint8_t(prologueOffset), UWOP_ALLOC_SMALL, uint8_t((stackSize - 8) / 8)});
|
||||
|
||||
LUAU_ASSERT(stackOffset % 16 == 0);
|
||||
LUAU_ASSERT(prologueOffset == prologueSize);
|
||||
|
||||
this->prologSize = prologueSize;
|
||||
}
|
||||
|
||||
size_t UnwindBuilderWin::getSize() const
|
||||
{
|
||||
return sizeof(UnwindFunctionWin) * unwindFunctions.size() + size_t(rawDataPos - rawData);
|
||||
}
|
||||
|
||||
size_t UnwindBuilderWin::getFunctionCount() const
|
||||
{
|
||||
return unwindFunctions.size();
|
||||
}
|
||||
|
||||
void UnwindBuilderWin::finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const
|
||||
{
|
||||
// Copy adjusted function information
|
||||
for (UnwindFunctionWin func : unwindFunctions)
|
||||
{
|
||||
// Code will start after the unwind info
|
||||
func.beginOffset += uint32_t(offset);
|
||||
|
||||
// Whole block is a part of a 'single function'
|
||||
if (func.endOffset == kFullBlockFuncton)
|
||||
func.endOffset = uint32_t(funcSize);
|
||||
else
|
||||
func.endOffset += uint32_t(offset);
|
||||
|
||||
// Unwind data is placed right after the RUNTIME_FUNCTION data
|
||||
func.unwindInfoOffset += uint32_t(sizeof(UnwindFunctionWin) * unwindFunctions.size());
|
||||
memcpy(target, &func, sizeof(func));
|
||||
target += sizeof(func);
|
||||
}
|
||||
|
||||
// Copy unwind codes
|
||||
memcpy(target, rawData, size_t(rawDataPos - rawData));
|
||||
}
|
||||
|
||||
} // namespace CodeGen
|
||||
} // namespace Luau
|
|
@ -0,0 +1,21 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "luacodegen.h"
|
||||
|
||||
#include "Luau/CodeGen.h"
|
||||
|
||||
#include "lapi.h"
|
||||
|
||||
int luau_codegen_supported()
|
||||
{
|
||||
return Luau::CodeGen::isSupported();
|
||||
}
|
||||
|
||||
void luau_codegen_create(lua_State* L)
|
||||
{
|
||||
Luau::CodeGen::create(L);
|
||||
}
|
||||
|
||||
void luau_codegen_compile(lua_State* L, int idx)
|
||||
{
|
||||
Luau::CodeGen::compile(L, idx);
|
||||
}
|
40
src/lib.rs
40
src/lib.rs
|
@ -6,6 +6,8 @@ pub struct Build {
|
|||
out_dir: Option<PathBuf>,
|
||||
target: Option<String>,
|
||||
host: Option<String>,
|
||||
// Enable code generator (jit)
|
||||
enable_codegen: bool,
|
||||
}
|
||||
|
||||
pub struct Artifacts {
|
||||
|
@ -22,6 +24,7 @@ impl Build {
|
|||
out_dir: env::var_os("OUT_DIR").map(|s| PathBuf::from(s).join("luau-build")),
|
||||
target: env::var("TARGET").ok(),
|
||||
host: env::var("HOST").ok(),
|
||||
enable_codegen: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -40,6 +43,11 @@ impl Build {
|
|||
self
|
||||
}
|
||||
|
||||
pub fn enable_codegen(&mut self, enable: bool) -> &mut Build {
|
||||
self.enable_codegen = enable;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn build(&mut self) -> Artifacts {
|
||||
let target = &self.target.as_ref().expect("TARGET not set")[..];
|
||||
let host = &self.host.as_ref().expect("HOST not set")[..];
|
||||
|
@ -51,6 +59,8 @@ impl Build {
|
|||
let common_include_dir = source_dir_base.join("luau").join("Common").join("include");
|
||||
let ast_source_dir = source_dir_base.join("luau").join("Ast").join("src");
|
||||
let ast_include_dir = source_dir_base.join("luau").join("Ast").join("include");
|
||||
let codegen_source_dir = source_dir_base.join("luau").join("CodeGen").join("src");
|
||||
let codegen_include_dir = source_dir_base.join("luau").join("CodeGen").join("include");
|
||||
let compiler_source_dir = source_dir_base.join("luau").join("Compiler").join("src");
|
||||
let compiler_include_dir = source_dir_base
|
||||
.join("luau")
|
||||
|
@ -81,6 +91,10 @@ impl Build {
|
|||
.flag_if_supported("/std:c++17") // MSVC
|
||||
.cpp(true);
|
||||
|
||||
if self.enable_codegen {
|
||||
config.define("LUA_CUSTOM_EXECUTION", None);
|
||||
}
|
||||
|
||||
if cfg!(not(debug_assertions)) {
|
||||
config.define("NDEBUG", None);
|
||||
config.opt_level(2);
|
||||
|
@ -98,6 +112,24 @@ impl Build {
|
|||
.out_dir(&lib_dir)
|
||||
.compile(ast_lib_name);
|
||||
|
||||
// Build CogeGen
|
||||
let codegen_lib_name = "luaucodegen";
|
||||
if self.enable_codegen {
|
||||
config
|
||||
.clone()
|
||||
.include(&codegen_include_dir)
|
||||
.include(&common_include_dir)
|
||||
.include(&vm_include_dir)
|
||||
.include(&vm_source_dir)
|
||||
.define("LUACODEGEN_API", "extern \"C\"")
|
||||
// Code generator uses lua VM internals, so we need to provide the same defines used to build VM
|
||||
.define("LUA_API", "extern \"C\"")
|
||||
.define("LUAI_MAXCSTACK", "100000")
|
||||
.add_files_by_ext(&codegen_source_dir, "cpp")
|
||||
.out_dir(&lib_dir)
|
||||
.compile(codegen_lib_name);
|
||||
}
|
||||
|
||||
// Build Compiler
|
||||
let compiler_lib_name = "luaucompiler";
|
||||
config
|
||||
|
@ -130,7 +162,7 @@ impl Build {
|
|||
fs::copy(compiler_include_dir.join(f), include_dir.join(f)).unwrap();
|
||||
}
|
||||
|
||||
Artifacts {
|
||||
let mut artifacts = Artifacts {
|
||||
lib_dir,
|
||||
include_dir,
|
||||
libs: vec![
|
||||
|
@ -139,7 +171,13 @@ impl Build {
|
|||
vm_lib_name.to_string(),
|
||||
],
|
||||
cpp_stdlib: Self::get_cpp_link_stdlib(target),
|
||||
};
|
||||
|
||||
if self.enable_codegen {
|
||||
artifacts.libs.push(codegen_lib_name.to_string());
|
||||
}
|
||||
|
||||
artifacts
|
||||
}
|
||||
|
||||
fn get_cpp_link_stdlib(target: &str) -> Option<String> {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
fn main() {
|
||||
println!("cargo:rerun-if-changed=build.rs");
|
||||
let artifacts = luau0_src::Build::new().build();
|
||||
let artifacts = luau0_src::Build::new().enable_codegen(true).build();
|
||||
artifacts.print_cargo_metadata();
|
||||
}
|
||||
|
|
|
@ -15,9 +15,14 @@ extern "C" {
|
|||
pub fn free(ptr: *mut c_void);
|
||||
|
||||
pub fn luaL_newstate() -> *mut c_void;
|
||||
pub fn lua_close(state: *mut c_void);
|
||||
pub fn luaL_openlibs(state: *mut c_void);
|
||||
pub fn lua_getfield(state: *mut c_void, index: c_int, k: *const c_char) -> c_int;
|
||||
pub fn lua_tolstring(state: *mut c_void, index: c_int, len: *mut c_long) -> *const c_char;
|
||||
pub fn lua_call(state: *mut c_void, nargs: c_int, nresults: c_int);
|
||||
|
||||
pub fn lua_pushinteger(state: *mut c_void, n: c_int);
|
||||
pub fn lua_tointegerx(state: *mut c_void, index: c_int, isnum: *mut c_int) -> c_int;
|
||||
|
||||
pub fn luau_compile(
|
||||
source: *const c_char,
|
||||
|
@ -32,6 +37,10 @@ extern "C" {
|
|||
size: usize,
|
||||
env: c_int,
|
||||
) -> c_int;
|
||||
|
||||
pub fn luau_codegen_supported() -> c_int;
|
||||
pub fn luau_codegen_create(state: *mut c_void);
|
||||
pub fn luau_codegen_compile(state: *mut c_void, idx: c_int);
|
||||
}
|
||||
|
||||
pub unsafe fn lua_getglobal(state: *mut c_void, k: *const c_char) {
|
||||
|
@ -45,6 +54,11 @@ fn luau_works() {
|
|||
let state = luaL_newstate();
|
||||
assert!(state != ptr::null_mut());
|
||||
|
||||
// Enable JIT if supported
|
||||
if luau_codegen_supported() != 0 {
|
||||
luau_codegen_create(state);
|
||||
}
|
||||
|
||||
luaL_openlibs(state);
|
||||
|
||||
let version = {
|
||||
|
@ -56,7 +70,7 @@ fn luau_works() {
|
|||
|
||||
assert_eq!(version, "Luau".as_bytes());
|
||||
|
||||
let code = "function sum(a, b) return a + b end\0";
|
||||
let code = "local a, b = ... return a + b\0";
|
||||
let mut bytecode_size = 0;
|
||||
let bytecode = luau_compile(
|
||||
code.as_ptr().cast(),
|
||||
|
@ -64,8 +78,21 @@ fn luau_works() {
|
|||
ptr::null_mut(),
|
||||
&mut bytecode_size,
|
||||
);
|
||||
let result = luau_load(state, "test\0".as_ptr().cast(), bytecode, bytecode_size, 0);
|
||||
let result = luau_load(state, "sum\0".as_ptr().cast(), bytecode, bytecode_size, 0);
|
||||
assert_eq!(result, 0);
|
||||
free(bytecode.cast());
|
||||
|
||||
// Compile the function (JIT, if supported)
|
||||
if luau_codegen_supported() != 0 {
|
||||
luau_codegen_compile(state, -1);
|
||||
}
|
||||
|
||||
// Call the loaded function
|
||||
lua_pushinteger(state, 123);
|
||||
lua_pushinteger(state, 321);
|
||||
lua_call(state, 2, 1);
|
||||
assert_eq!(lua_tointegerx(state, -1, ptr::null_mut()), 444);
|
||||
|
||||
lua_close(state);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue