v0.5.10+luau581

This commit is contained in:
Alex Orlenko 2023-06-16 21:56:50 +01:00
parent ab7f49f168
commit 63f535e684
No known key found for this signature in database
GPG Key ID: 4C150C250863B96D
41 changed files with 1254 additions and 452 deletions

View File

@ -1,6 +1,6 @@
[package]
name = "luau0-src"
version = "0.5.9+luau579"
version = "0.5.10+luau581"
authors = ["Aleksandr Orlenko <zxteam@protonmail.com>"]
edition = "2021"
repository = "https://github.com/khvzak/luau-src-rs"

View File

@ -801,12 +801,20 @@ struct AstDeclaredClassProp
bool isMethod = false;
};
struct AstTableIndexer
{
AstType* indexType;
AstType* resultType;
Location location;
};
class AstStatDeclareClass : public AstStat
{
public:
LUAU_RTTI(AstStatDeclareClass)
AstStatDeclareClass(const Location& location, const AstName& name, std::optional<AstName> superName, const AstArray<AstDeclaredClassProp>& props);
AstStatDeclareClass(const Location& location, const AstName& name, std::optional<AstName> superName, const AstArray<AstDeclaredClassProp>& props,
AstTableIndexer* indexer = nullptr);
void visit(AstVisitor* visitor) override;
@ -814,6 +822,7 @@ public:
std::optional<AstName> superName;
AstArray<AstDeclaredClassProp> props;
AstTableIndexer* indexer;
};
class AstType : public AstNode
@ -862,13 +871,6 @@ struct AstTableProp
AstType* type;
};
struct AstTableIndexer
{
AstType* indexType;
AstType* resultType;
Location location;
};
class AstTypeTable : public AstType
{
public:

View File

@ -714,12 +714,13 @@ void AstStatDeclareFunction::visit(AstVisitor* visitor)
}
}
AstStatDeclareClass::AstStatDeclareClass(
const Location& location, const AstName& name, std::optional<AstName> superName, const AstArray<AstDeclaredClassProp>& props)
AstStatDeclareClass::AstStatDeclareClass(const Location& location, const AstName& name, std::optional<AstName> superName,
const AstArray<AstDeclaredClassProp>& props, AstTableIndexer* indexer)
: AstStat(ClassIndex(), location)
, name(name)
, superName(superName)
, props(props)
, indexer(indexer)
{
}

View File

@ -13,6 +13,7 @@
// See docs/SyntaxChanges.md for an explanation.
LUAU_FASTINTVARIABLE(LuauRecursionLimit, 1000)
LUAU_FASTINTVARIABLE(LuauParseErrorLimit, 100)
LUAU_FASTFLAGVARIABLE(LuauParseDeclareClassIndexer, false)
#define ERROR_INVALID_INTERP_DOUBLE_BRACE "Double braces are not permitted within interpolated strings. Did you mean '\\{'?"
@ -877,6 +878,7 @@ AstStat* Parser::parseDeclaration(const Location& start)
}
TempVector<AstDeclaredClassProp> props(scratchDeclaredClassProps);
AstTableIndexer* indexer = nullptr;
while (lexer.current().type != Lexeme::ReservedEnd)
{
@ -885,7 +887,8 @@ AstStat* Parser::parseDeclaration(const Location& start)
{
props.push_back(parseDeclaredClassMethod());
}
else if (lexer.current().type == '[')
else if (lexer.current().type == '[' && (!FFlag::LuauParseDeclareClassIndexer || lexer.lookahead().type == Lexeme::RawString ||
lexer.lookahead().type == Lexeme::QuotedString))
{
const Lexeme begin = lexer.current();
nextLexeme(); // [
@ -904,6 +907,22 @@ AstStat* Parser::parseDeclaration(const Location& start)
else
report(begin.location, "String literal contains malformed escape sequence");
}
else if (lexer.current().type == '[' && FFlag::LuauParseDeclareClassIndexer)
{
if (indexer)
{
// maybe we don't need to parse the entire badIndexer...
// however, we either have { or [ to lint, not the entire table type or the bad indexer.
AstTableIndexer* badIndexer = parseTableIndexer();
// we lose all additional indexer expressions from the AST after error recovery here
report(badIndexer->location, "Cannot have more than one class indexer");
}
else
{
indexer = parseTableIndexer();
}
}
else
{
Name propName = parseName("property name");
@ -916,7 +935,7 @@ AstStat* Parser::parseDeclaration(const Location& start)
Location classEnd = lexer.current().location;
nextLexeme(); // skip past `end`
return allocator.alloc<AstStatDeclareClass>(Location(classStart, classEnd), className.name, superName, copy(props));
return allocator.alloc<AstStatDeclareClass>(Location(classStart, classEnd), className.name, superName, copy(props), indexer);
}
else if (std::optional<Name> globalName = parseNameOpt("global variable name"))
{

View File

@ -14,13 +14,10 @@ namespace A64
enum class AddressKindA64 : uint8_t
{
imm, // reg + imm
reg, // reg + reg
// TODO:
// reg + reg << shift
// reg + sext(reg) << shift
// reg + uext(reg) << shift
reg, // reg + reg
imm, // reg + imm
pre, // reg + imm, reg += imm
post, // reg, reg += imm
};
struct AddressA64
@ -29,13 +26,14 @@ struct AddressA64
// For example, ldr x0, [reg+imm] is limited to 8 KB offsets assuming imm is divisible by 8, but loading into w0 reduces the range to 4 KB
static constexpr size_t kMaxOffset = 1023;
constexpr AddressA64(RegisterA64 base, int off = 0)
: kind(AddressKindA64::imm)
constexpr AddressA64(RegisterA64 base, int off = 0, AddressKindA64 kind = AddressKindA64::imm)
: kind(kind)
, base(base)
, offset(xzr)
, data(off)
{
LUAU_ASSERT(base.kind == KindA64::x || base == sp);
LUAU_ASSERT(kind != AddressKindA64::reg);
}
constexpr AddressA64(RegisterA64 base, RegisterA64 offset)

View File

@ -98,6 +98,8 @@ public:
void call(Label& label);
void call(OperandX64 op);
void lea(RegisterX64 lhs, Label& label);
void int3();
void ud2();
@ -243,6 +245,7 @@ private:
LUAU_NOINLINE void log(const char* opcode, OperandX64 op1, OperandX64 op2, OperandX64 op3, OperandX64 op4);
LUAU_NOINLINE void log(Label label);
LUAU_NOINLINE void log(const char* opcode, Label label);
LUAU_NOINLINE void log(const char* opcode, RegisterX64 reg, Label label);
void log(OperandX64 op);
const char* getSizeName(SizeX64 size) const;

View File

@ -1,6 +1,8 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/Common.h"
#include <bitset>
#include <utility>
#include <vector>
@ -37,6 +39,16 @@ struct RegisterSet
void requireVariadicSequence(RegisterSet& sourceRs, const RegisterSet& defRs, uint8_t varargStart);
struct BlockOrdering
{
uint32_t depth = 0;
uint32_t preOrder = ~0u;
uint32_t postOrder = ~0u;
bool visited = false;
};
struct CfgInfo
{
std::vector<uint32_t> predecessors;
@ -45,6 +57,15 @@ struct CfgInfo
std::vector<uint32_t> successors;
std::vector<uint32_t> successorsOffsets;
// Immediate dominators (unique parent in the dominator tree)
std::vector<uint32_t> idoms;
// Children in the dominator tree
std::vector<uint32_t> domChildren;
std::vector<uint32_t> domChildrenOffsets;
std::vector<BlockOrdering> domOrdering;
// VM registers that are live when the block is entered
// Additionally, an active variadic sequence can exist at the entry of the block
std::vector<RegisterSet> in;
@ -64,6 +85,18 @@ struct CfgInfo
RegisterSet captured;
};
// A quick refresher on dominance and dominator trees:
// * If A is a dominator of B (A dom B), you can never execute B without executing A first
// * A is a strict dominator of B (A sdom B) is similar to previous one but A != B
// * Immediate dominator node N (idom N) is a unique node T so that T sdom N,
// but T does not strictly dominate any other node that dominates N.
// * Dominance frontier is a set of nodes where dominance of a node X ends.
// In practice this is where values established by node X might no longer hold because of join edges from other nodes coming in.
// This is also where PHI instructions in SSA are placed.
void computeCfgImmediateDominators(IrFunction& function);
void computeCfgDominanceTreeChildren(IrFunction& function);
// Function used to update all CFG data
void computeCfgInfo(IrFunction& function);
struct BlockIteratorWrapper
@ -90,10 +123,17 @@ struct BlockIteratorWrapper
{
return itEnd;
}
uint32_t operator[](size_t pos) const
{
LUAU_ASSERT(pos < size_t(itEnd - itBegin));
return itBegin[pos];
}
};
BlockIteratorWrapper predecessors(const CfgInfo& cfg, uint32_t blockIdx);
BlockIteratorWrapper successors(const CfgInfo& cfg, uint32_t blockIdx);
BlockIteratorWrapper domChildren(const CfgInfo& cfg, uint32_t blockIdx);
} // namespace CodeGen
} // namespace Luau

View File

@ -801,6 +801,8 @@ struct IrBlock
uint32_t start = ~0u;
uint32_t finish = ~0u;
uint32_t sortkey = ~0u;
Label label;
};
@ -823,6 +825,7 @@ struct IrFunction
uint32_t validRestoreOpBlockIdx = 0;
Proto* proto = nullptr;
bool variadic = false;
CfgInfo cfg;

View File

@ -38,6 +38,8 @@ std::string toString(const IrFunction& function, bool includeUseInfo);
std::string dump(const IrFunction& function);
std::string toDot(const IrFunction& function, bool includeInst);
std::string toDotCfg(const IrFunction& function);
std::string toDotDjGraph(const IrFunction& function);
std::string dumpDot(const IrFunction& function, bool includeInst);

View File

@ -876,6 +876,9 @@ void AssemblyBuilderA64::placeA(const char* name, RegisterA64 dst, AddressA64 sr
switch (src.kind)
{
case AddressKindA64::reg:
place(dst.index | (src.base.index << 5) | (0b011'0'10 << 10) | (src.offset.index << 16) | (1 << 21) | (opsize << 22));
break;
case AddressKindA64::imm:
if (unsigned(src.data >> sizelog) < 1024 && (src.data & ((1 << sizelog) - 1)) == 0)
place(dst.index | (src.base.index << 5) | ((src.data >> sizelog) << 10) | (opsize << 22) | (1 << 24));
@ -884,8 +887,13 @@ void AssemblyBuilderA64::placeA(const char* name, RegisterA64 dst, AddressA64 sr
else
LUAU_ASSERT(!"Unable to encode large immediate offset");
break;
case AddressKindA64::reg:
place(dst.index | (src.base.index << 5) | (0b011'0'10 << 10) | (src.offset.index << 16) | (1 << 21) | (opsize << 22));
case AddressKindA64::pre:
LUAU_ASSERT(src.data >= -256 && src.data <= 255);
place(dst.index | (src.base.index << 5) | (0b11 << 10) | ((src.data & ((1 << 9) - 1)) << 12) | (opsize << 22));
break;
case AddressKindA64::post:
LUAU_ASSERT(src.data >= -256 && src.data <= 255);
place(dst.index | (src.base.index << 5) | (0b01 << 10) | ((src.data & ((1 << 9) - 1)) << 12) | (opsize << 22));
break;
}
@ -1312,23 +1320,37 @@ void AssemblyBuilderA64::log(RegisterA64 reg)
void AssemblyBuilderA64::log(AddressA64 addr)
{
text.append("[");
switch (addr.kind)
{
case AddressKindA64::imm:
log(addr.base);
if (addr.data != 0)
logAppend(",#%d", addr.data);
break;
case AddressKindA64::reg:
text.append("[");
log(addr.base);
text.append(",");
log(addr.offset);
text.append("]");
break;
case AddressKindA64::imm:
text.append("[");
log(addr.base);
if (addr.data != 0)
logAppend(" LSL #%d", addr.data);
logAppend(",#%d", addr.data);
text.append("]");
break;
case AddressKindA64::pre:
text.append("[");
log(addr.base);
if (addr.data != 0)
logAppend(",#%d", addr.data);
text.append("]!");
break;
case AddressKindA64::post:
text.append("[");
log(addr.base);
text.append("]!");
if (addr.data != 0)
logAppend(",#%d", addr.data);
break;
}
text.append("]");
}
} // namespace A64

View File

@ -463,6 +463,20 @@ void AssemblyBuilderX64::call(OperandX64 op)
commit();
}
void AssemblyBuilderX64::lea(RegisterX64 lhs, Label& label)
{
LUAU_ASSERT(lhs.size == SizeX64::qword);
placeBinaryRegAndRegMem(lhs, OperandX64(SizeX64::qword, noreg, 1, rip, 0), 0x8d, 0x8d);
codePos -= 4;
placeLabel(label);
commit();
if (logText)
log("lea", lhs, label);
}
void AssemblyBuilderX64::int3()
{
if (logText)
@ -1501,6 +1515,14 @@ void AssemblyBuilderX64::log(const char* opcode, Label label)
logAppend(" %-12s.L%d\n", opcode, label.id);
}
void AssemblyBuilderX64::log(const char* opcode, RegisterX64 reg, Label label)
{
logAppend(" %-12s", opcode);
log(reg);
text.append(",");
logAppend(".L%d\n", label.id);
}
void AssemblyBuilderX64::log(OperandX64 op)
{
switch (op.cat)

View File

@ -125,7 +125,7 @@ static bool lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction&
return (a.kind == IrBlockKind::Fallback) < (b.kind == IrBlockKind::Fallback);
// Try to order by instruction order
return a.start < b.start;
return a.sortkey < b.sortkey;
});
// For each IR instruction that begins a bytecode instruction, which bytecode instruction is it?
@ -234,6 +234,8 @@ static bool lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction&
build.setLabel(abandoned.label);
}
lowering.finishFunction();
return false;
}
}
@ -244,7 +246,15 @@ static bool lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction&
build.logAppend("#\n");
}
if (outputEnabled && !options.includeOutlinedCode && seenFallback)
if (!seenFallback)
{
textSize = build.text.length();
codeSize = build.getCodeSize();
}
lowering.finishFunction();
if (outputEnabled && !options.includeOutlinedCode && textSize < build.text.size())
{
build.text.resize(textSize);
@ -268,7 +278,7 @@ static bool lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction&
[[maybe_unused]] static bool lowerIr(
A64::AssemblyBuilderA64& build, IrBuilder& ir, NativeState& data, ModuleHelpers& helpers, Proto* proto, AssemblyOptions options)
{
A64::IrLoweringA64 lowering(build, helpers, data, proto, ir.function);
A64::IrLoweringA64 lowering(build, helpers, data, ir.function);
return lowerImpl(build, lowering, ir.function, proto->bytecodeid, options);
}
@ -594,6 +604,12 @@ std::string getAssembly(lua_State* L, int idx, AssemblyOptions options)
X64::assembleHelpers(build, helpers);
#endif
if (!options.includeOutlinedCode && options.includeAssembly)
{
build.text.clear();
build.logAppend("; skipping %u bytes of outlined helpers\n", unsigned(build.getCodeSize() * sizeof(build.code[0])));
}
for (Proto* p : protos)
if (p)
if (std::optional<NativeProto> np = assembleFunction(build, data, helpers, p, options))

View File

@ -117,6 +117,81 @@ static void emitReentry(AssemblyBuilderA64& build, ModuleHelpers& helpers)
build.br(x4);
}
void emitReturn(AssemblyBuilderA64& build, ModuleHelpers& helpers)
{
// x1 = res
// w2 = number of written values
// x0 = ci
build.ldr(x0, mem(rState, offsetof(lua_State, ci)));
// w3 = ci->nresults
build.ldr(w3, mem(x0, offsetof(CallInfo, nresults)));
Label skipResultCopy;
// Fill the rest of the expected results (nresults - written) with 'nil'
build.cmp(w2, w3);
build.b(ConditionA64::GreaterEqual, skipResultCopy);
// TODO: cmp above could compute this and flags using subs
build.sub(w2, w3, w2); // counter = nresults - written
build.mov(w4, LUA_TNIL);
Label repeatNilLoop = build.setLabel();
build.str(w4, mem(x1, offsetof(TValue, tt)));
build.add(x1, x1, sizeof(TValue));
build.sub(w2, w2, 1);
build.cbnz(w2, repeatNilLoop);
build.setLabel(skipResultCopy);
// x2 = cip = ci - 1
build.sub(x2, x0, sizeof(CallInfo));
// res = cip->top when nresults >= 0
Label skipFixedRetTop;
build.tbnz(w3, 31, skipFixedRetTop);
build.ldr(x1, mem(x2, offsetof(CallInfo, top))); // res = cip->top
build.setLabel(skipFixedRetTop);
// Update VM state (ci, base, top)
build.str(x2, mem(rState, offsetof(lua_State, ci))); // L->ci = cip
build.ldr(rBase, mem(x2, offsetof(CallInfo, base))); // sync base = L->base while we have a chance
build.str(rBase, mem(rState, offsetof(lua_State, base))); // L->base = cip->base
build.str(x1, mem(rState, offsetof(lua_State, top))); // L->top = res
// Unlikely, but this might be the last return from VM
build.ldr(w4, mem(x0, offsetof(CallInfo, flags)));
build.tbnz(w4, countrz(LUA_CALLINFO_RETURN), helpers.exitNoContinueVm);
// Continue in interpreter if function has no native data
build.ldr(w4, mem(x2, offsetof(CallInfo, flags)));
build.tbz(w4, countrz(LUA_CALLINFO_NATIVE), helpers.exitContinueVm);
// Need to update state of the current function before we jump away
build.ldr(rClosure, mem(x2, offsetof(CallInfo, func)));
build.ldr(rClosure, mem(rClosure, offsetof(TValue, value.gc)));
build.ldr(x1, mem(rClosure, offsetof(Closure, l.p))); // cl->l.p aka proto
LUAU_ASSERT(offsetof(Proto, code) == offsetof(Proto, k) + 8);
build.ldp(rConstants, rCode, mem(x1, offsetof(Proto, k))); // proto->k, proto->code
// Get instruction index from instruction pointer
// To get instruction index from instruction pointer, we need to divide byte offset by 4
// But we will actually need to scale instruction index by 4 back to byte offset later so it cancels out
build.ldr(x2, mem(x2, offsetof(CallInfo, savedpc))); // cip->savedpc
build.sub(x2, x2, rCode);
// Get new instruction location and jump to it
LUAU_ASSERT(offsetof(Proto, exectarget) == offsetof(Proto, execdata) + 8);
build.ldp(x3, x4, mem(x1, offsetof(Proto, execdata)));
build.ldr(w2, mem(x3, x2));
build.add(x4, x4, x2);
build.br(x4);
}
static EntryLocations buildEntryFunction(AssemblyBuilderA64& build, UnwindBuilder& unwind)
{
EntryLocations locations;
@ -213,23 +288,28 @@ void assembleHelpers(AssemblyBuilderA64& build, ModuleHelpers& helpers)
{
if (build.logText)
build.logAppend("; exitContinueVm\n");
helpers.exitContinueVm = build.setLabel();
build.setLabel(helpers.exitContinueVm);
emitExit(build, /* continueInVm */ true);
if (build.logText)
build.logAppend("; exitNoContinueVm\n");
helpers.exitNoContinueVm = build.setLabel();
build.setLabel(helpers.exitNoContinueVm);
emitExit(build, /* continueInVm */ false);
if (build.logText)
build.logAppend("; reentry\n");
helpers.reentry = build.setLabel();
build.setLabel(helpers.reentry);
emitReentry(build, helpers);
if (build.logText)
build.logAppend("; interrupt\n");
helpers.interrupt = build.setLabel();
build.setLabel(helpers.interrupt);
emitInterrupt(build);
if (build.logText)
build.logAppend("; return\n");
build.setLabel(helpers.return_);
emitReturn(build, helpers);
}
} // namespace A64

View File

@ -17,8 +17,6 @@
#include <string.h>
LUAU_FASTFLAG(LuauUniformTopHandling)
// All external function calls that can cause stack realloc or Lua calls have to be wrapped in VM_PROTECT
// This makes sure that we save the pc (in case the Lua call needs to generate a backtrace) before the call,
// and restores the stack pointer after in case stack gets reallocated
@ -306,44 +304,6 @@ Closure* callFallback(lua_State* L, StkId ra, StkId argtop, int nresults)
}
}
// Extracted as-is from lvmexecute.cpp with the exception of control flow (reentry) and removed interrupts
Closure* returnFallback(lua_State* L, StkId ra, StkId valend)
{
// ci is our callinfo, cip is our parent
CallInfo* ci = L->ci;
CallInfo* cip = ci - 1;
StkId res = ci->func; // note: we assume CALL always puts func+args and expects results to start at func
StkId vali = ra;
int nresults = ci->nresults;
// copy return values into parent stack (but only up to nresults!), fill the rest with nil
// note: in MULTRET context nresults starts as -1 so i != 0 condition never activates intentionally
int i;
for (i = nresults; i != 0 && vali < valend; i--)
setobj2s(L, res++, vali++);
while (i-- > 0)
setnilvalue(res++);
// pop the stack frame
L->ci = cip;
L->base = cip->base;
L->top = (nresults == LUA_MULTRET) ? res : cip->top;
// we're done!
if (LUAU_UNLIKELY(ci->flags & LUA_CALLINFO_RETURN))
{
if (!FFlag::LuauUniformTopHandling)
L->top = res;
return NULL;
}
// keep executing new function
LUAU_ASSERT(isLua(cip));
return clvalue(cip->func);
}
const Instruction* executeGETGLOBAL(lua_State* L, const Instruction* pc, StkId base, TValue* k)
{
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);

View File

@ -18,7 +18,6 @@ Closure* callProlog(lua_State* L, TValue* ra, StkId argtop, int nresults);
void callEpilogC(lua_State* L, int nresults, int n);
Closure* callFallback(lua_State* L, StkId ra, StkId argtop, int nresults);
Closure* returnFallback(lua_State* L, StkId ra, StkId valend);
const Instruction* executeGETGLOBAL(lua_State* L, const Instruction* pc, StkId base, TValue* k);
const Instruction* executeSETGLOBAL(lua_State* L, const Instruction* pc, StkId base, TValue* k);

View File

@ -56,6 +56,11 @@ static EntryLocations buildEntryFunction(AssemblyBuilderX64& build, UnwindBuilde
locations.start = build.setLabel();
unwind.startFunction();
RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi;
RegisterX64 rArg2 = (build.abi == ABIX64::Windows) ? rdx : rsi;
RegisterX64 rArg3 = (build.abi == ABIX64::Windows) ? r8 : rdx;
RegisterX64 rArg4 = (build.abi == ABIX64::Windows) ? r9 : rcx;
// Save common non-volatile registers
if (build.abi == ABIX64::SystemV)
{
@ -177,18 +182,28 @@ void assembleHelpers(X64::AssemblyBuilderX64& build, ModuleHelpers& helpers)
{
if (build.logText)
build.logAppend("; exitContinueVm\n");
helpers.exitContinueVm = build.setLabel();
build.setLabel(helpers.exitContinueVm);
emitExit(build, /* continueInVm */ true);
if (build.logText)
build.logAppend("; exitNoContinueVm\n");
helpers.exitNoContinueVm = build.setLabel();
build.setLabel(helpers.exitNoContinueVm);
emitExit(build, /* continueInVm */ false);
if (build.logText)
build.logAppend("; continueCallInVm\n");
helpers.continueCallInVm = build.setLabel();
build.setLabel(helpers.continueCallInVm);
emitContinueCallInVm(build);
if (build.logText)
build.logAppend("; interrupt\n");
build.setLabel(helpers.interrupt);
emitInterrupt(build);
if (build.logText)
build.logAppend("; return\n");
build.setLabel(helpers.return_);
emitReturn(build, helpers);
}
} // namespace X64

View File

@ -24,13 +24,14 @@ struct ModuleHelpers
// A64/X64
Label exitContinueVm;
Label exitNoContinueVm;
Label return_;
Label interrupt;
// X64
Label continueCallInVm;
// A64
Label reentry; // x0: closure
Label interrupt; // x0: pc offset, x1: return address, x2: interrupt
};
} // namespace CodeGen

View File

@ -278,39 +278,34 @@ void emitUpdateBase(AssemblyBuilderX64& build)
build.mov(rBase, qword[rState + offsetof(lua_State, base)]);
}
static void emitSetSavedPc(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos)
void emitInterrupt(AssemblyBuilderX64& build)
{
ScopedRegX64 tmp1{regs, SizeX64::qword};
ScopedRegX64 tmp2{regs, SizeX64::qword};
// rax = pcpos + 1
// rbx = return address in native code
build.mov(tmp1.reg, sCode);
build.add(tmp1.reg, pcpos * sizeof(Instruction));
build.mov(tmp2.reg, qword[rState + offsetof(lua_State, ci)]);
build.mov(qword[tmp2.reg + offsetof(CallInfo, savedpc)], tmp1.reg);
}
// note: rbx is non-volatile so it will be saved across interrupt call automatically
RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi;
RegisterX64 rArg2 = (build.abi == ABIX64::Windows) ? rdx : rsi;
void emitInterrupt(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos)
{
Label skip;
ScopedRegX64 tmp{regs, SizeX64::qword};
// Update L->ci->savedpc; required in case interrupt errors
build.mov(rcx, sCode);
build.lea(rcx, addr[rcx + rax * sizeof(Instruction)]);
build.mov(rax, qword[rState + offsetof(lua_State, ci)]);
build.mov(qword[rax + offsetof(CallInfo, savedpc)], rcx);
// Skip if there is no interrupt set
build.mov(tmp.reg, qword[rState + offsetof(lua_State, global)]);
build.mov(tmp.reg, qword[tmp.reg + offsetof(global_State, cb.interrupt)]);
build.test(tmp.reg, tmp.reg);
// Load interrupt handler; it may be nullptr in case the update raced with the check before we got here
build.mov(rax, qword[rState + offsetof(lua_State, global)]);
build.mov(rax, qword[rax + offsetof(global_State, cb.interrupt)]);
build.test(rax, rax);
build.jcc(ConditionX64::Zero, skip);
emitSetSavedPc(regs, build, pcpos + 1);
// Call interrupt
// TODO: This code should move to the end of the function, or even be outlined so that it can be shared by multiple interruptible instructions
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::dword, -1);
callWrap.call(tmp.release());
emitUpdateBase(build); // interrupt may have reallocated stack
build.mov(rArg1, rState);
build.mov(dwordReg(rArg2), -1);
build.call(rax);
// Check if we need to exit
build.mov(al, byte[rState + offsetof(lua_State, status)]);
@ -322,6 +317,10 @@ void emitInterrupt(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos)
emitExit(build, /* continueInVm */ false);
build.setLabel(skip);
emitUpdateBase(build); // interrupt may have reallocated stack
build.jmp(rbx);
}
void emitFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int offset, int pcpos)
@ -352,6 +351,90 @@ void emitContinueCallInVm(AssemblyBuilderX64& build)
emitExit(build, /* continueInVm */ true);
}
void emitReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers)
{
// input: res in rdi, number of written values in ecx
RegisterX64 res = rdi;
RegisterX64 written = ecx;
RegisterX64 ci = r8;
RegisterX64 cip = r9;
RegisterX64 nresults = esi;
build.mov(ci, qword[rState + offsetof(lua_State, ci)]);
build.lea(cip, addr[ci - sizeof(CallInfo)]);
// nresults = ci->nresults
build.mov(nresults, dword[ci + offsetof(CallInfo, nresults)]);
Label skipResultCopy;
// Fill the rest of the expected results (nresults - written) with 'nil'
RegisterX64 counter = written;
build.sub(counter, nresults); // counter = -(nresults - written)
build.jcc(ConditionX64::GreaterEqual, skipResultCopy);
Label repeatNilLoop = build.setLabel();
build.mov(dword[res + offsetof(TValue, tt)], LUA_TNIL);
build.add(res, sizeof(TValue));
build.inc(counter);
build.jcc(ConditionX64::NotZero, repeatNilLoop);
build.setLabel(skipResultCopy);
build.mov(qword[rState + offsetof(lua_State, ci)], cip); // L->ci = cip
build.mov(rBase, qword[cip + offsetof(CallInfo, base)]); // sync base = L->base while we have a chance
build.mov(qword[rState + offsetof(lua_State, base)], rBase); // L->base = cip->base
Label skipFixedRetTop;
build.test(nresults, nresults); // test here will set SF=1 for a negative number and it always sets OF to 0
build.jcc(ConditionX64::Less, skipFixedRetTop); // jl jumps if SF != OF
build.mov(res, qword[cip + offsetof(CallInfo, top)]); // res = cip->top
build.setLabel(skipFixedRetTop);
build.mov(qword[rState + offsetof(lua_State, top)], res); // L->top = res
// Unlikely, but this might be the last return from VM
build.test(byte[ci + offsetof(CallInfo, flags)], LUA_CALLINFO_RETURN);
build.jcc(ConditionX64::NotZero, helpers.exitNoContinueVm);
// Returning back to the previous function is a bit tricky
// Registers alive: r9 (cip)
RegisterX64 proto = rcx;
RegisterX64 execdata = rbx;
// Change closure
build.mov(rax, qword[cip + offsetof(CallInfo, func)]);
build.mov(rax, qword[rax + offsetof(TValue, value.gc)]);
build.mov(sClosure, rax);
build.mov(proto, qword[rax + offsetof(Closure, l.p)]);
build.mov(execdata, qword[proto + offsetof(Proto, execdata)]);
build.test(byte[cip + offsetof(CallInfo, flags)], LUA_CALLINFO_NATIVE);
build.jcc(ConditionX64::Zero, helpers.exitContinueVm); // Continue in interpreter if function has no native data
// Change constants
build.mov(rConstants, qword[proto + offsetof(Proto, k)]);
// Change code
build.mov(rdx, qword[proto + offsetof(Proto, code)]);
build.mov(sCode, rdx);
build.mov(rax, qword[cip + offsetof(CallInfo, savedpc)]);
// To get instruction index from instruction pointer, we need to divide byte offset by 4
// But we will actually need to scale instruction index by 4 back to byte offset later so it cancels out
build.sub(rax, rdx);
// Get new instruction location and jump to it
build.mov(edx, dword[execdata + rax]);
build.add(rdx, qword[proto + offsetof(Proto, exectarget)]);
build.jmp(rdx);
}
} // namespace X64
} // namespace CodeGen
} // namespace Luau

View File

@ -53,31 +53,6 @@ constexpr OperandX64 sCode = qword[rsp + kStackSize + 8]; // Instruction* cod
constexpr OperandX64 sTemporarySlot = addr[rsp + kStackSize + 16];
constexpr OperandX64 sSpillArea = addr[rsp + kStackSize + 24];
// TODO: These should be replaced with a portable call function that checks the ABI at runtime and reorders moves accordingly to avoid conflicts
#if defined(_WIN32)
constexpr RegisterX64 rArg1 = rcx;
constexpr RegisterX64 rArg2 = rdx;
constexpr RegisterX64 rArg3 = r8;
constexpr RegisterX64 rArg4 = r9;
constexpr RegisterX64 rArg5 = noreg;
constexpr RegisterX64 rArg6 = noreg;
constexpr OperandX64 sArg5 = qword[rsp + 32];
constexpr OperandX64 sArg6 = qword[rsp + 40];
#else
constexpr RegisterX64 rArg1 = rdi;
constexpr RegisterX64 rArg2 = rsi;
constexpr RegisterX64 rArg3 = rdx;
constexpr RegisterX64 rArg4 = rcx;
constexpr RegisterX64 rArg5 = r8;
constexpr RegisterX64 rArg6 = r9;
constexpr OperandX64 sArg5 = noreg;
constexpr OperandX64 sArg6 = noreg;
#endif
inline OperandX64 luauReg(int ri)
{
return xmmword[rBase + ri * sizeof(TValue)];
@ -202,11 +177,13 @@ void callStepGc(IrRegAllocX64& regs, AssemblyBuilderX64& build);
void emitExit(AssemblyBuilderX64& build, bool continueInVm);
void emitUpdateBase(AssemblyBuilderX64& build);
void emitInterrupt(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos);
void emitInterrupt(AssemblyBuilderX64& build);
void emitFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int offset, int pcpos);
void emitContinueCallInVm(AssemblyBuilderX64& build);
void emitReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers);
} // namespace X64
} // namespace CodeGen
} // namespace Luau

View File

@ -18,6 +18,12 @@ namespace X64
void emitInstCall(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults)
{
// TODO: This should use IrCallWrapperX64
RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi;
RegisterX64 rArg2 = (build.abi == ABIX64::Windows) ? rdx : rsi;
RegisterX64 rArg3 = (build.abi == ABIX64::Windows) ? r8 : rdx;
RegisterX64 rArg4 = (build.abi == ABIX64::Windows) ? r9 : rcx;
build.mov(rArg1, rState);
build.lea(rArg2, luauRegAddress(ra));
@ -163,167 +169,90 @@ void emitInstCall(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int
}
}
void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int actualResults)
void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int actualResults, bool functionVariadic)
{
RegisterX64 ci = r8;
RegisterX64 cip = r9;
RegisterX64 res = rdi;
RegisterX64 nresults = esi;
build.mov(ci, qword[rState + offsetof(lua_State, ci)]);
build.lea(cip, addr[ci - sizeof(CallInfo)]);
// res = ci->func; note: we assume CALL always puts func+args and expects results to start at func
build.mov(res, qword[ci + offsetof(CallInfo, func)]);
// nresults = ci->nresults
build.mov(nresults, dword[ci + offsetof(CallInfo, nresults)]);
RegisterX64 written = ecx;
if (functionVariadic)
{
Label skipResultCopy;
build.mov(res, qword[rState + offsetof(lua_State, ci)]);
build.mov(res, qword[res + offsetof(CallInfo, func)]);
}
else if (actualResults != 1)
build.lea(res, addr[rBase - sizeof(TValue)]); // invariant: ci->func + 1 == ci->base for non-variadic frames
RegisterX64 counter = ecx;
if (actualResults == 0)
if (actualResults == 0)
{
build.xor_(written, written);
build.jmp(helpers.return_);
}
else if (actualResults == 1 && !functionVariadic)
{
// fast path: minimizes res adjustments
// note that we skipped res computation for this specific case above
build.vmovups(xmm0, luauReg(ra));
build.vmovups(xmmword[rBase - sizeof(TValue)], xmm0);
build.mov(res, rBase);
build.mov(written, 1);
build.jmp(helpers.return_);
}
else if (actualResults >= 1 && actualResults <= 3)
{
for (int r = 0; r < actualResults; ++r)
{
// Our instruction doesn't have any results, so just fill results expected in parent with 'nil'
build.test(nresults, nresults); // test here will set SF=1 for a negative number, ZF=1 for zero and OF=0
build.jcc(ConditionX64::LessEqual, skipResultCopy); // jle jumps if SF != OF or ZF == 1
build.mov(counter, nresults);
Label repeatNilLoop = build.setLabel();
build.mov(dword[res + offsetof(TValue, tt)], LUA_TNIL);
build.add(res, sizeof(TValue));
build.dec(counter);
build.jcc(ConditionX64::NotZero, repeatNilLoop);
build.vmovups(xmm0, luauReg(ra + r));
build.vmovups(xmmword[res + r * sizeof(TValue)], xmm0);
}
else if (actualResults == 1)
{
// Try setting our 1 result
build.test(nresults, nresults);
build.jcc(ConditionX64::Zero, skipResultCopy);
build.add(res, actualResults * sizeof(TValue));
build.mov(written, actualResults);
build.jmp(helpers.return_);
}
else
{
RegisterX64 vali = rax;
RegisterX64 valend = rdx;
build.lea(counter, addr[nresults - 1]);
// vali = ra
build.lea(vali, luauRegAddress(ra));
build.vmovups(xmm0, luauReg(ra));
build.vmovups(xmmword[res], xmm0);
build.add(res, sizeof(TValue));
// Fill the rest of the expected results with 'nil'
build.test(counter, counter); // test here will set SF=1 for a negative number, ZF=1 for zero and OF=0
build.jcc(ConditionX64::LessEqual, skipResultCopy); // jle jumps if SF != OF or ZF == 1
Label repeatNilLoop = build.setLabel();
build.mov(dword[res + offsetof(TValue, tt)], LUA_TNIL);
build.add(res, sizeof(TValue));
build.dec(counter);
build.jcc(ConditionX64::NotZero, repeatNilLoop);
}
// Copy as much as possible for MULTRET calls, and only as much as needed otherwise
if (actualResults == LUA_MULTRET)
build.mov(valend, qword[rState + offsetof(lua_State, top)]); // valend = L->top
else
build.lea(valend, luauRegAddress(ra + actualResults)); // valend = ra + actualResults
build.xor_(written, written);
Label repeatValueLoop, exitValueLoop;
if (actualResults == LUA_MULTRET)
{
RegisterX64 vali = rax;
RegisterX64 valend = rdx;
// Copy return values into parent stack (but only up to nresults!)
build.test(nresults, nresults);
build.jcc(ConditionX64::Zero, skipResultCopy);
// vali = ra
build.lea(vali, luauRegAddress(ra));
// Copy as much as possible for MULTRET calls, and only as much as needed otherwise
if (actualResults == LUA_MULTRET)
build.mov(valend, qword[rState + offsetof(lua_State, top)]); // valend = L->top
else
build.lea(valend, luauRegAddress(ra + actualResults)); // valend = ra + actualResults
build.mov(counter, nresults);
Label repeatValueLoop, exitValueLoop;
build.setLabel(repeatValueLoop);
build.cmp(vali, valend);
build.jcc(ConditionX64::NotBelow, exitValueLoop);
build.vmovups(xmm0, xmmword[vali]);
build.vmovups(xmmword[res], xmm0);
build.add(vali, sizeof(TValue));
build.add(res, sizeof(TValue));
build.dec(counter);
build.jcc(ConditionX64::NotZero, repeatValueLoop);
build.setLabel(exitValueLoop);
// Fill the rest of the expected results with 'nil'
build.test(counter, counter); // test here will set SF=1 for a negative number, ZF=1 for zero and OF=0
build.jcc(ConditionX64::LessEqual, skipResultCopy); // jle jumps if SF != OF or ZF == 1
Label repeatNilLoop = build.setLabel();
build.mov(dword[res + offsetof(TValue, tt)], LUA_TNIL);
build.add(res, sizeof(TValue));
build.dec(counter);
build.jcc(ConditionX64::NotZero, repeatNilLoop);
}
build.setLabel(skipResultCopy);
build.setLabel(repeatValueLoop);
build.vmovups(xmm0, xmmword[vali]);
build.vmovups(xmmword[res], xmm0);
build.add(vali, sizeof(TValue));
build.add(res, sizeof(TValue));
build.inc(written);
build.cmp(vali, valend);
build.jcc(ConditionX64::Below, repeatValueLoop);
build.setLabel(exitValueLoop);
build.jmp(helpers.return_);
}
build.mov(qword[rState + offsetof(lua_State, ci)], cip); // L->ci = cip
build.mov(rBase, qword[cip + offsetof(CallInfo, base)]); // sync base = L->base while we have a chance
build.mov(qword[rState + offsetof(lua_State, base)], rBase); // L->base = cip->base
// Start with result for LUA_MULTRET/exit value
build.mov(qword[rState + offsetof(lua_State, top)], res); // L->top = res
// Unlikely, but this might be the last return from VM
build.test(byte[ci + offsetof(CallInfo, flags)], LUA_CALLINFO_RETURN);
build.jcc(ConditionX64::NotZero, helpers.exitNoContinueVm);
Label skipFixedRetTop;
build.test(nresults, nresults); // test here will set SF=1 for a negative number and it always sets OF to 0
build.jcc(ConditionX64::Less, skipFixedRetTop); // jl jumps if SF != OF
build.mov(rax, qword[cip + offsetof(CallInfo, top)]);
build.mov(qword[rState + offsetof(lua_State, top)], rax); // L->top = cip->top
build.setLabel(skipFixedRetTop);
// Returning back to the previous function is a bit tricky
// Registers alive: r9 (cip)
RegisterX64 proto = rcx;
RegisterX64 execdata = rbx;
// Change closure
build.mov(rax, qword[cip + offsetof(CallInfo, func)]);
build.mov(rax, qword[rax + offsetof(TValue, value.gc)]);
build.mov(sClosure, rax);
build.mov(proto, qword[rax + offsetof(Closure, l.p)]);
build.mov(execdata, qword[proto + offsetof(Proto, execdata)]);
build.test(byte[cip + offsetof(CallInfo, flags)], LUA_CALLINFO_NATIVE);
build.jcc(ConditionX64::Zero, helpers.exitContinueVm); // Continue in interpreter if function has no native data
// Change constants
build.mov(rConstants, qword[proto + offsetof(Proto, k)]);
// Change code
build.mov(rdx, qword[proto + offsetof(Proto, code)]);
build.mov(sCode, rdx);
build.mov(rax, qword[cip + offsetof(CallInfo, savedpc)]);
// To get instruction index from instruction pointer, we need to divide byte offset by 4
// But we will actually need to scale instruction index by 4 back to byte offset later so it cancels out
build.sub(rax, rdx);
// Get new instruction location and jump to it
build.mov(edx, dword[execdata + rax]);
build.add(rdx, qword[proto + offsetof(Proto, exectarget)]);
build.jmp(rdx);
}
void emitInstSetList(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, int count, uint32_t index)
{
// TODO: This should use IrCallWrapperX64
RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi;
RegisterX64 rArg2 = (build.abi == ABIX64::Windows) ? rdx : rsi;
RegisterX64 rArg3 = (build.abi == ABIX64::Windows) ? r8 : rdx;
OperandX64 last = index + count - 1;
// Using non-volatile 'rbx' for dynamic 'count' value (for LUA_MULTRET) to skip later recomputation
@ -426,6 +355,12 @@ void emitInstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRep
// ipairs-style traversal is handled in IR
LUAU_ASSERT(aux >= 0);
// TODO: This should use IrCallWrapperX64
RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi;
RegisterX64 rArg2 = (build.abi == ABIX64::Windows) ? rdx : rsi;
RegisterX64 rArg3 = (build.abi == ABIX64::Windows) ? r8 : rdx;
RegisterX64 rArg4 = (build.abi == ABIX64::Windows) ? r9 : rcx;
// This is a fast-path for builtin table iteration, tag check for 'ra' has to be performed before emitting this instruction
// Registers are chosen in this way to simplify fallback code for the node part

View File

@ -18,7 +18,7 @@ class AssemblyBuilderX64;
struct IrRegAllocX64;
void emitInstCall(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults);
void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int actualResults);
void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int actualResults, bool functionVariadic);
void emitInstSetList(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, int count, uint32_t index);
void emitInstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat);

View File

@ -661,9 +661,212 @@ static void computeCfgBlockEdges(IrFunction& function)
}
}
// Assign tree depth and pre- and post- DFS visit order of the tree/graph nodes
// Optionally, collect required node order into a vector
template<auto childIt>
void computeBlockOrdering(
IrFunction& function, std::vector<BlockOrdering>& ordering, std::vector<uint32_t>* preOrder, std::vector<uint32_t>* postOrder)
{
CfgInfo& info = function.cfg;
LUAU_ASSERT(info.idoms.size() == function.blocks.size());
ordering.clear();
ordering.resize(function.blocks.size());
// Get depth-first post-order using manual stack instead of recursion
struct StackItem
{
uint32_t blockIdx;
uint32_t itPos;
};
std::vector<StackItem> stack;
if (preOrder)
preOrder->reserve(function.blocks.size());
if (postOrder)
postOrder->reserve(function.blocks.size());
uint32_t nextPreOrder = 0;
uint32_t nextPostOrder = 0;
stack.push_back({0, 0});
ordering[0].visited = true;
ordering[0].preOrder = nextPreOrder++;
while (!stack.empty())
{
StackItem& item = stack.back();
BlockIteratorWrapper children = childIt(info, item.blockIdx);
if (item.itPos < children.size())
{
uint32_t childIdx = children[item.itPos++];
BlockOrdering& childOrdering = ordering[childIdx];
if (!childOrdering.visited)
{
childOrdering.visited = true;
childOrdering.depth = uint32_t(stack.size());
childOrdering.preOrder = nextPreOrder++;
if (preOrder)
preOrder->push_back(item.blockIdx);
stack.push_back({childIdx, 0});
}
}
else
{
ordering[item.blockIdx].postOrder = nextPostOrder++;
if (postOrder)
postOrder->push_back(item.blockIdx);
stack.pop_back();
}
}
}
// Dominance tree construction based on 'A Simple, Fast Dominance Algorithm' [Keith D. Cooper, et al]
// This solution has quadratic complexity in the worst case.
// It is possible to switch to SEMI-NCA algorithm (also quadratic) mentioned in 'Linear-Time Algorithms for Dominators and Related Problems' [Loukas
// Georgiadis]
// Find block that is common between blocks 'a' and 'b' on the path towards the entry
static uint32_t findCommonDominator(const std::vector<uint32_t>& idoms, const std::vector<BlockOrdering>& data, uint32_t a, uint32_t b)
{
while (a != b)
{
while (data[a].postOrder < data[b].postOrder)
{
a = idoms[a];
LUAU_ASSERT(a != ~0u);
}
while (data[b].postOrder < data[a].postOrder)
{
b = idoms[b];
LUAU_ASSERT(b != ~0u);
}
}
return a;
}
void computeCfgImmediateDominators(IrFunction& function)
{
CfgInfo& info = function.cfg;
// Clear existing data
info.idoms.clear();
info.idoms.resize(function.blocks.size(), ~0u);
std::vector<BlockOrdering> ordering;
std::vector<uint32_t> blocksInPostOrder;
computeBlockOrdering<successors>(function, ordering, /* preOrder */ nullptr, &blocksInPostOrder);
// Entry node is temporarily marked to be an idom of itself to make algorithm work
info.idoms[0] = 0;
// Iteratively compute immediate dominators
bool updated = true;
while (updated)
{
updated = false;
// Go over blocks in reverse post-order of CFG
// '- 2' skips the root node which is last in post-order traversal
for (int i = int(blocksInPostOrder.size() - 2); i >= 0; i--)
{
uint32_t blockIdx = blocksInPostOrder[i];
uint32_t newIdom = ~0u;
for (uint32_t predIdx : predecessors(info, blockIdx))
{
if (uint32_t predIdom = info.idoms[predIdx]; predIdom != ~0u)
{
if (newIdom == ~0u)
newIdom = predIdx;
else
newIdom = findCommonDominator(info.idoms, ordering, newIdom, predIdx);
}
}
if (newIdom != info.idoms[blockIdx])
{
info.idoms[blockIdx] = newIdom;
// Run until a fixed point is reached
updated = true;
}
}
}
// Entry node doesn't have an immediate dominator
info.idoms[0] = ~0u;
}
void computeCfgDominanceTreeChildren(IrFunction& function)
{
CfgInfo& info = function.cfg;
// Clear existing data
info.domChildren.clear();
info.domChildrenOffsets.clear();
info.domChildrenOffsets.resize(function.blocks.size());
// First we need to know children count of each node in the dominance tree
// We use offset array for to hold this data, counts will be readjusted to offsets later
for (size_t blockIdx = 0; blockIdx < function.blocks.size(); blockIdx++)
{
uint32_t domParent = info.idoms[blockIdx];
if (domParent != ~0u)
info.domChildrenOffsets[domParent]++;
}
// Convert counds to offsets using prefix sum
uint32_t total = 0;
for (size_t blockIdx = 0; blockIdx < function.blocks.size(); blockIdx++)
{
uint32_t& offset = info.domChildrenOffsets[blockIdx];
uint32_t count = offset;
offset = total;
total += count;
}
info.domChildren.resize(total);
for (size_t blockIdx = 0; blockIdx < function.blocks.size(); blockIdx++)
{
// We use a trick here, where we use the starting offset of the dominance children list as the position where to write next child
// The values will be adjusted back in a separate loop later
uint32_t domParent = info.idoms[blockIdx];
if (domParent != ~0u)
info.domChildren[info.domChildrenOffsets[domParent]++] = uint32_t(blockIdx);
}
// Offsets into the dominance children list were used as iterators in the previous loop
// That process basically moved the values in the array 1 step towards the start
// Here we move them one step towards the end and restore 0 for first offset
for (int blockIdx = int(function.blocks.size() - 1); blockIdx > 0; blockIdx--)
info.domChildrenOffsets[blockIdx] = info.domChildrenOffsets[blockIdx - 1];
info.domChildrenOffsets[0] = 0;
computeBlockOrdering<domChildren>(function, info.domOrdering, /* preOrder */ nullptr, /* postOrder */ nullptr);
}
void computeCfgInfo(IrFunction& function)
{
computeCfgBlockEdges(function);
computeCfgImmediateDominators(function);
computeCfgDominanceTreeChildren(function);
computeCfgLiveInOutRegSets(function);
}
@ -687,5 +890,15 @@ BlockIteratorWrapper successors(const CfgInfo& cfg, uint32_t blockIdx)
return BlockIteratorWrapper{cfg.successors.data() + start, cfg.successors.data() + end};
}
BlockIteratorWrapper domChildren(const CfgInfo& cfg, uint32_t blockIdx)
{
LUAU_ASSERT(blockIdx < cfg.domChildrenOffsets.size());
uint32_t start = cfg.domChildrenOffsets[blockIdx];
uint32_t end = blockIdx + 1 < cfg.domChildrenOffsets.size() ? cfg.domChildrenOffsets[blockIdx + 1] : uint32_t(cfg.domChildren.size());
return BlockIteratorWrapper{cfg.domChildren.data() + start, cfg.domChildren.data() + end};
}
} // namespace CodeGen
} // namespace Luau

View File

@ -25,6 +25,7 @@ IrBuilder::IrBuilder()
void IrBuilder::buildFunctionIr(Proto* proto)
{
function.proto = proto;
function.variadic = proto->is_vararg != 0;
// Rebuild original control flow blocks
rebuildBytecodeBasicBlocks(proto);
@ -428,6 +429,7 @@ void IrBuilder::beginBlock(IrOp block)
LUAU_ASSERT(target.start == ~0u || target.start == uint32_t(function.instructions.size()));
target.start = uint32_t(function.instructions.size());
target.sortkey = target.start;
inTerminatedBlock = false;
}

View File

@ -656,28 +656,23 @@ std::string dump(const IrFunction& function)
return result;
}
std::string toDot(const IrFunction& function, bool includeInst)
static void appendLabelRegset(IrToStringContext& ctx, const std::vector<RegisterSet>& regSets, size_t blockIdx, const char* name)
{
std::string result;
IrToStringContext ctx{result, function.blocks, function.constants, function.cfg};
if (blockIdx < regSets.size())
{
const RegisterSet& rs = regSets[blockIdx];
auto appendLabelRegset = [&ctx](const std::vector<RegisterSet>& regSets, size_t blockIdx, const char* name) {
if (blockIdx < regSets.size())
if (rs.regs.any() || rs.varargSeq)
{
const RegisterSet& rs = regSets[blockIdx];
if (rs.regs.any() || rs.varargSeq)
{
append(ctx.result, "|{%s|", name);
appendRegisterSet(ctx, rs, "|");
append(ctx.result, "}");
}
append(ctx.result, "|{%s|", name);
appendRegisterSet(ctx, rs, "|");
append(ctx.result, "}");
}
};
append(ctx.result, "digraph CFG {\n");
append(ctx.result, "node[shape=record]\n");
}
}
static void appendBlocks(IrToStringContext& ctx, const IrFunction& function, bool includeInst, bool includeIn, bool includeOut, bool includeDef)
{
for (size_t i = 0; i < function.blocks.size(); i++)
{
const IrBlock& block = function.blocks[i];
@ -692,7 +687,8 @@ std::string toDot(const IrFunction& function, bool includeInst)
append(ctx.result, "label=\"{");
toString(ctx, block, uint32_t(i));
appendLabelRegset(ctx.cfg.in, i, "in");
if (includeIn)
appendLabelRegset(ctx, ctx.cfg.in, i, "in");
if (includeInst && block.start != ~0u)
{
@ -709,11 +705,25 @@ std::string toDot(const IrFunction& function, bool includeInst)
}
}
appendLabelRegset(ctx.cfg.def, i, "def");
appendLabelRegset(ctx.cfg.out, i, "out");
if (includeDef)
appendLabelRegset(ctx, ctx.cfg.def, i, "def");
if (includeOut)
appendLabelRegset(ctx, ctx.cfg.out, i, "out");
append(ctx.result, "}\"];\n");
}
}
std::string toDot(const IrFunction& function, bool includeInst)
{
std::string result;
IrToStringContext ctx{result, function.blocks, function.constants, function.cfg};
append(ctx.result, "digraph CFG {\n");
append(ctx.result, "node[shape=record]\n");
appendBlocks(ctx, function, includeInst, /* includeIn */ true, /* includeOut */ true, /* includeDef */ true);
for (size_t i = 0; i < function.blocks.size(); i++)
{
@ -750,6 +760,107 @@ std::string toDot(const IrFunction& function, bool includeInst)
return result;
}
std::string toDotCfg(const IrFunction& function)
{
std::string result;
IrToStringContext ctx{result, function.blocks, function.constants, function.cfg};
append(ctx.result, "digraph CFG {\n");
append(ctx.result, "node[shape=record]\n");
appendBlocks(ctx, function, /* includeInst */ false, /* includeIn */ false, /* includeOut */ false, /* includeDef */ true);
for (size_t i = 0; i < function.blocks.size() && i < ctx.cfg.successorsOffsets.size(); i++)
{
BlockIteratorWrapper succ = successors(ctx.cfg, unsigned(i));
for (uint32_t target : succ)
append(ctx.result, "b%u -> b%u;\n", unsigned(i), target);
}
append(ctx.result, "}\n");
return result;
}
std::string toDotDjGraph(const IrFunction& function)
{
std::string result;
IrToStringContext ctx{result, function.blocks, function.constants, function.cfg};
append(ctx.result, "digraph CFG {\n");
for (size_t i = 0; i < ctx.blocks.size(); i++)
{
const IrBlock& block = ctx.blocks[i];
append(ctx.result, "b%u [", unsigned(i));
if (block.kind == IrBlockKind::Fallback)
append(ctx.result, "style=filled;fillcolor=salmon;");
else if (block.kind == IrBlockKind::Bytecode)
append(ctx.result, "style=filled;fillcolor=palegreen;");
append(ctx.result, "label=\"");
toString(ctx, block, uint32_t(i));
append(ctx.result, "\"];\n");
}
// Layer by depth in tree
uint32_t depth = 0;
bool found = true;
while (found)
{
found = false;
append(ctx.result, "{rank = same;");
for (size_t i = 0; i < ctx.cfg.domOrdering.size(); i++)
{
if (ctx.cfg.domOrdering[i].depth == depth)
{
append(ctx.result, "b%u;", unsigned(i));
found = true;
}
}
append(ctx.result, "}\n");
depth++;
}
for (size_t i = 0; i < ctx.cfg.domChildrenOffsets.size(); i++)
{
BlockIteratorWrapper dom = domChildren(ctx.cfg, unsigned(i));
for (uint32_t target : dom)
append(ctx.result, "b%u -> b%u;\n", unsigned(i), target);
// Join edges are all successor edges that do not strongly dominate
BlockIteratorWrapper succ = successors(ctx.cfg, unsigned(i));
for (uint32_t successor : succ)
{
bool found = false;
for (uint32_t target : dom)
{
if (target == successor)
{
found = true;
break;
}
}
if (!found)
append(ctx.result, "b%u -> b%u [style=dotted];\n", unsigned(i), successor);
}
}
append(ctx.result, "}\n");
return result;
}
std::string dumpDot(const IrFunction& function, bool includeInst)
{
std::string result = toDot(function, includeInst);

View File

@ -185,11 +185,10 @@ static bool emitBuiltin(
}
}
IrLoweringA64::IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, NativeState& data, Proto* proto, IrFunction& function)
IrLoweringA64::IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, NativeState& data, IrFunction& function)
: build(build)
, helpers(helpers)
, data(data)
, proto(proto)
, function(function)
, regs(function, {{x0, x15}, {x16, x17}, {q0, q7}, {q16, q31}})
, valueTracker(function)
@ -1166,25 +1165,17 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
}
case IrCmd::INTERRUPT:
{
RegisterA64 temp = regs.allocTemp(KindA64::x);
regs.spill(build, index);
Label skip, next;
build.ldr(temp, mem(rState, offsetof(lua_State, global)));
build.ldr(temp, mem(temp, offsetof(global_State, cb.interrupt)));
build.cbz(temp, skip);
Label self;
size_t spills = regs.spill(build, index);
build.ldr(x0, mem(rState, offsetof(lua_State, global)));
build.ldr(x0, mem(x0, offsetof(global_State, cb.interrupt)));
build.cbnz(x0, self);
// Jump to outlined interrupt handler, it will give back control to x1
build.mov(x0, (uintOp(inst.a) + 1) * sizeof(Instruction));
build.adr(x1, next);
build.b(helpers.interrupt);
Label next = build.setLabel();
build.setLabel(next);
regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state
build.setLabel(skip);
interruptHandlers.push_back({self, uintOp(inst.a), next});
break;
}
case IrCmd::CHECK_GC:
@ -1343,19 +1334,71 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
break;
case IrCmd::RETURN:
regs.spill(build, index);
// valend = (n == LUA_MULTRET) ? L->top : ra + n
if (intOp(inst.b) == LUA_MULTRET)
build.ldr(x2, mem(rState, offsetof(lua_State, top)));
else
build.add(x2, rBase, uint16_t((vmRegOp(inst.a) + intOp(inst.b)) * sizeof(TValue)));
// returnFallback(L, ra, valend)
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, returnFallback)));
build.blr(x3);
// reentry with x0=closure (NULL will trigger exit)
build.b(helpers.reentry);
if (function.variadic)
{
build.ldr(x1, mem(rState, offsetof(lua_State, ci)));
build.ldr(x1, mem(x1, offsetof(CallInfo, func)));
}
else if (intOp(inst.b) != 1)
build.sub(x1, rBase, sizeof(TValue)); // invariant: ci->func + 1 == ci->base for non-variadic frames
if (intOp(inst.b) == 0)
{
build.mov(w2, 0);
build.b(helpers.return_);
}
else if (intOp(inst.b) == 1 && !function.variadic)
{
// fast path: minimizes x1 adjustments
// note that we skipped x1 computation for this specific case above
build.ldr(q0, mem(rBase, vmRegOp(inst.a) * sizeof(TValue)));
build.str(q0, mem(rBase, -int(sizeof(TValue))));
build.mov(x1, rBase);
build.mov(w2, 1);
build.b(helpers.return_);
}
else if (intOp(inst.b) >= 1 && intOp(inst.b) <= 3)
{
for (int r = 0; r < intOp(inst.b); ++r)
{
build.ldr(q0, mem(rBase, (vmRegOp(inst.a) + r) * sizeof(TValue)));
build.str(q0, mem(x1, sizeof(TValue), AddressKindA64::post));
}
build.mov(w2, intOp(inst.b));
build.b(helpers.return_);
}
else
{
build.mov(w2, 0);
// vali = ra
build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
// valend = (n == LUA_MULTRET) ? L->top : ra + n
if (intOp(inst.b) == LUA_MULTRET)
build.ldr(x4, mem(rState, offsetof(lua_State, top)));
else
build.add(x4, rBase, uint16_t((vmRegOp(inst.a) + intOp(inst.b)) * sizeof(TValue)));
Label repeatValueLoop, exitValueLoop;
if (intOp(inst.b) == LUA_MULTRET)
{
build.cmp(x3, x4);
build.b(ConditionA64::CarrySet, exitValueLoop); // CarrySet == UnsignedGreaterEqual
}
build.setLabel(repeatValueLoop);
build.ldr(q0, mem(x3, sizeof(TValue), AddressKindA64::post));
build.str(q0, mem(x1, sizeof(TValue), AddressKindA64::post));
build.add(w2, w2, 1);
build.cmp(x3, x4);
build.b(ConditionA64::CarryClear, repeatValueLoop); // CarryClear == UnsignedLess
build.setLabel(exitValueLoop);
build.b(helpers.return_);
}
break;
case IrCmd::FORGLOOP:
// register layout: ra + 1 = table, ra + 2 = internal index, ra + 3 .. ra + aux = iteration variables
@ -1682,6 +1725,20 @@ void IrLoweringA64::finishBlock()
regs.assertNoSpills();
}
void IrLoweringA64::finishFunction()
{
if (build.logText)
build.logAppend("; interrupt handlers\n");
for (InterruptHandler& handler : interruptHandlers)
{
build.setLabel(handler.self);
build.mov(x0, (handler.pcpos + 1) * sizeof(Instruction));
build.adr(x1, handler.next);
build.b(helpers.interrupt);
}
}
bool IrLoweringA64::hasError() const
{
return error;

View File

@ -9,8 +9,6 @@
#include <vector>
struct Proto;
namespace Luau
{
namespace CodeGen
@ -25,10 +23,11 @@ namespace A64
struct IrLoweringA64
{
IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, NativeState& data, Proto* proto, IrFunction& function);
IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, NativeState& data, IrFunction& function);
void lowerInst(IrInst& inst, uint32_t index, IrBlock& next);
void finishBlock();
void finishFunction();
bool hasError() const;
@ -55,10 +54,16 @@ struct IrLoweringA64
IrBlock& blockOp(IrOp op) const;
Label& labelOp(IrOp op) const;
struct InterruptHandler
{
Label self;
unsigned int pcpos;
Label next;
};
AssemblyBuilderA64& build;
ModuleHelpers& helpers;
NativeState& data;
Proto* proto = nullptr; // Temporarily required to provide 'Instruction* pc' to old emitInst* methods
IrFunction& function;
@ -66,6 +71,8 @@ struct IrLoweringA64
IrValueLocationTracking valueTracker;
std::vector<InterruptHandler> interruptHandlers;
bool error = false;
};

View File

@ -958,8 +958,27 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
break;
}
case IrCmd::INTERRUPT:
emitInterrupt(regs, build, uintOp(inst.a));
{
unsigned pcpos = uintOp(inst.a);
// We unconditionally spill values here because that allows us to ignore register state when we synthesize interrupt handler
// This can be changed in the future if we can somehow record interrupt handler code separately
// Since interrupts are loop edges or call/ret, we don't have a significant opportunity for register reuse here anyway
regs.preserveAndFreeInstValues();
ScopedRegX64 tmp{regs, SizeX64::qword};
Label self;
build.mov(tmp.reg, qword[rState + offsetof(lua_State, global)]);
build.cmp(qword[tmp.reg + offsetof(global_State, cb.interrupt)], 0);
build.jcc(ConditionX64::NotEqual, self);
Label next = build.setLabel();
interruptHandlers.push_back({self, pcpos, next});
break;
}
case IrCmd::CHECK_GC:
callStepGc(regs, build);
break;
@ -991,7 +1010,6 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
}
case IrCmd::SET_SAVEDPC:
{
// This is like emitSetSavedPc, but using register allocation instead of relying on rax/rdx
ScopedRegX64 tmp1{regs, SizeX64::qword};
ScopedRegX64 tmp2{regs, SizeX64::qword};
@ -1048,7 +1066,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
case IrCmd::RETURN:
regs.assertAllFree();
regs.assertNoSpills();
emitInstReturn(build, helpers, vmRegOp(inst.a), intOp(inst.b));
emitInstReturn(build, helpers, vmRegOp(inst.a), intOp(inst.b), function.variadic);
break;
case IrCmd::FORGLOOP:
regs.assertAllFree();
@ -1350,6 +1368,20 @@ void IrLoweringX64::finishBlock()
regs.assertNoSpills();
}
void IrLoweringX64::finishFunction()
{
if (build.logText)
build.logAppend("; interrupt handlers\n");
for (InterruptHandler& handler : interruptHandlers)
{
build.setLabel(handler.self);
build.mov(rax, handler.pcpos + 1);
build.lea(rbx, handler.next);
build.jmp(helpers.interrupt);
}
}
bool IrLoweringX64::hasError() const
{
// If register allocator had to use more stack slots than we have available, this function can't run natively

View File

@ -29,6 +29,7 @@ struct IrLoweringX64
void lowerInst(IrInst& inst, uint32_t index, IrBlock& next);
void finishBlock();
void finishFunction();
bool hasError() const;
@ -53,6 +54,13 @@ struct IrLoweringX64
IrBlock& blockOp(IrOp op) const;
Label& labelOp(IrOp op) const;
struct InterruptHandler
{
Label self;
unsigned int pcpos;
Label next;
};
AssemblyBuilderX64& build;
ModuleHelpers& helpers;
NativeState& data;
@ -62,6 +70,8 @@ struct IrLoweringX64
IrRegAllocX64 regs;
IrValueLocationTracking valueTracker;
std::vector<InterruptHandler> interruptHandlers;
};
} // namespace X64

View File

@ -90,7 +90,6 @@ void initFunctions(NativeState& data)
data.context.callEpilogC = callEpilogC;
data.context.callFallback = callFallback;
data.context.returnFallback = returnFallback;
data.context.executeGETGLOBAL = executeGETGLOBAL;
data.context.executeSETGLOBAL = executeSETGLOBAL;

View File

@ -86,7 +86,6 @@ struct NativeContext
void (*callEpilogC)(lua_State* L, int nresults, int n) = nullptr;
Closure* (*callFallback)(lua_State* L, StkId ra, StkId argtop, int nresults) = nullptr;
Closure* (*returnFallback)(lua_State* L, StkId ra, StkId valend) = nullptr;
// Opcode fallbacks, implemented in C
const Instruction* (*executeGETGLOBAL)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr;

View File

@ -1059,16 +1059,21 @@ static void tryCreateLinearBlock(IrBuilder& build, std::vector<uint8_t>& visited
// TODO: using values from the first block can cause 'live out' of the linear block predecessor to not have all required registers
constPropInBlock(build, startingBlock, state);
// Veryfy that target hasn't changed
// Verify that target hasn't changed
LUAU_ASSERT(function.instructions[startingBlock.finish].a.index == targetBlockIdx);
// Note: using startingBlock after this line is unsafe as the reference may be reallocated by build.block() below
uint32_t startingInsn = startingBlock.start;
// Create new linearized block into which we are going to redirect starting block jump
IrOp newBlock = build.block(IrBlockKind::Linearized);
visited.push_back(false);
// TODO: placement of linear blocks in final lowering is sub-optimal, it should follow our predecessor
build.beginBlock(newBlock);
// By default, blocks are ordered according to start instruction; we alter sort order to make sure linearized block is placed right after the starting block
function.blocks[newBlock.index].sortkey = startingInsn + 1;
replace(function, termInst.a, newBlock);
// Clone the collected path into our fresh block

View File

@ -413,8 +413,10 @@ enum LuauBytecodeTag
{
// Bytecode version; runtime supports [MIN, MAX], compiler emits TARGET by default but may emit a higher version when flags are enabled
LBC_VERSION_MIN = 3,
LBC_VERSION_MAX = 3,
LBC_VERSION_MAX = 4,
LBC_VERSION_TARGET = 3,
// Type encoding version
LBC_TYPE_VERSION = 1,
// Types of constant table entries
LBC_CONSTANT_NIL = 0,
LBC_CONSTANT_BOOLEAN,
@ -425,6 +427,25 @@ enum LuauBytecodeTag
LBC_CONSTANT_CLOSURE,
};
// Type table tags
enum LuauBytecodeEncodedType
{
LBC_TYPE_NIL = 0,
LBC_TYPE_BOOLEAN,
LBC_TYPE_NUMBER,
LBC_TYPE_STRING,
LBC_TYPE_TABLE,
LBC_TYPE_FUNCTION,
LBC_TYPE_THREAD,
LBC_TYPE_USERDATA,
LBC_TYPE_VECTOR,
LBC_TYPE_ANY = 15,
LBC_TYPE_OPTIONAL_BIT = 1 << 7,
LBC_TYPE_INVALID = 256,
};
// Builtin function ids, used in LOP_FASTCALL
enum LuauBuiltinFunction
{

View File

@ -74,6 +74,8 @@ public:
void foldJumps();
void expandJumps();
void setFunctionTypeInfo(std::string value);
void setDebugFunctionName(StringRef name);
void setDebugFunctionLineDefined(int line);
void setDebugLine(int line);
@ -118,6 +120,7 @@ public:
std::string dumpFunction(uint32_t id) const;
std::string dumpEverything() const;
std::string dumpSourceRemarks() const;
std::string dumpTypeInfo() const;
void annotateInstruction(std::string& result, uint32_t fid, uint32_t instpos) const;
@ -132,6 +135,7 @@ public:
static std::string getError(const std::string& message);
static uint8_t getVersion();
static uint8_t getTypeEncodingVersion();
private:
struct Constant
@ -186,6 +190,7 @@ private:
std::string dump;
std::string dumpname;
std::vector<int> dumpinstoffs;
std::string typeinfo;
};
struct DebugLocal

View File

@ -6,6 +6,8 @@
#include <algorithm>
#include <string.h>
LUAU_FASTFLAGVARIABLE(BytecodeVersion4, false)
namespace Luau
{
@ -513,6 +515,11 @@ bool BytecodeBuilder::patchSkipC(size_t jumpLabel, size_t targetLabel)
return true;
}
void BytecodeBuilder::setFunctionTypeInfo(std::string value)
{
functions[currentFunction].typeinfo = std::move(value);
}
void BytecodeBuilder::setDebugFunctionName(StringRef name)
{
unsigned int index = addStringTableEntry(name);
@ -606,6 +613,13 @@ void BytecodeBuilder::finalize()
bytecode = char(version);
if (FFlag::BytecodeVersion4)
{
uint8_t typesversion = getTypeEncodingVersion();
LUAU_ASSERT(typesversion == 1);
writeByte(bytecode, typesversion);
}
writeStringTable(bytecode);
writeVarInt(bytecode, uint32_t(functions.size()));
@ -628,6 +642,14 @@ void BytecodeBuilder::writeFunction(std::string& ss, uint32_t id) const
writeByte(ss, func.numupvalues);
writeByte(ss, func.isvararg);
if (FFlag::BytecodeVersion4)
{
writeByte(ss, 0); // Reserved for cgflags
writeVarInt(ss, uint32_t(func.typeinfo.size()));
ss.append(func.typeinfo);
}
// instructions
writeVarInt(ss, uint32_t(insns.size()));
@ -1092,9 +1114,18 @@ std::string BytecodeBuilder::getError(const std::string& message)
uint8_t BytecodeBuilder::getVersion()
{
// This function usually returns LBC_VERSION_TARGET but may sometimes return a higher number (within LBC_VERSION_MIN/MAX) under fast flags
if (FFlag::BytecodeVersion4)
return 4;
return LBC_VERSION_TARGET;
}
uint8_t BytecodeBuilder::getTypeEncodingVersion()
{
return LBC_TYPE_VERSION;
}
#ifdef LUAU_ASSERTENABLED
void BytecodeBuilder::validate() const
{
@ -2269,6 +2300,75 @@ std::string BytecodeBuilder::dumpSourceRemarks() const
return result;
}
static const char* getBaseTypeString(uint8_t type)
{
uint8_t tag = type & ~LBC_TYPE_OPTIONAL_BIT;
switch (tag)
{
case LBC_TYPE_NIL:
return "nil";
case LBC_TYPE_BOOLEAN:
return "boolean";
case LBC_TYPE_NUMBER:
return "number";
case LBC_TYPE_STRING:
return "string";
case LBC_TYPE_TABLE:
return "{ }";
case LBC_TYPE_FUNCTION:
return "function( )";
case LBC_TYPE_THREAD:
return "thread";
case LBC_TYPE_USERDATA:
return "userdata";
case LBC_TYPE_VECTOR:
return "vector";
case LBC_TYPE_ANY:
return "any";
}
LUAU_ASSERT(!"Unhandled type in getBaseTypeString");
return nullptr;
}
std::string BytecodeBuilder::dumpTypeInfo() const
{
std::string result;
for (size_t i = 0; i < functions.size(); ++i)
{
const std::string& typeinfo = functions[i].typeinfo;
if (typeinfo.empty())
continue;
uint8_t encodedType = typeinfo[0];
LUAU_ASSERT(encodedType == LBC_TYPE_FUNCTION);
formatAppend(result, "%zu: function(", i);
LUAU_ASSERT(typeinfo.size() >= 2);
uint8_t numparams = typeinfo[1];
LUAU_ASSERT(size_t(1 + numparams - 1) < typeinfo.size());
for (uint8_t i = 0; i < numparams; ++i)
{
uint8_t et = typeinfo[2 + i];
const char* optional = (et & LBC_TYPE_OPTIONAL_BIT) ? "?" : "";
formatAppend(result, "%s%s", getBaseTypeString(et), optional);
if (i + 1 != numparams)
formatAppend(result, ", ");
}
formatAppend(result, ")\n");
}
return result;
}
void BytecodeBuilder::annotateInstruction(std::string& result, uint32_t fid, uint32_t instpos) const
{
if ((dumpFlags & Dump_Code) == 0)

View File

@ -10,6 +10,7 @@
#include "ConstantFolding.h"
#include "CostModel.h"
#include "TableShape.h"
#include "Types.h"
#include "ValueTracking.h"
#include <algorithm>
@ -25,7 +26,8 @@ LUAU_FASTINTVARIABLE(LuauCompileInlineThreshold, 25)
LUAU_FASTINTVARIABLE(LuauCompileInlineThresholdMaxBoost, 300)
LUAU_FASTINTVARIABLE(LuauCompileInlineDepth, 5)
LUAU_FASTFLAGVARIABLE(LuauCompileInlineDefer, false)
LUAU_FASTFLAGVARIABLE(CompileFunctionType, false)
LUAU_FASTFLAG(BytecodeVersion4)
namespace Luau
{
@ -202,6 +204,13 @@ struct Compiler
setDebugLine(func);
if (FFlag::BytecodeVersion4 && FFlag::CompileFunctionType)
{
std::string funcType = getFunctionType(func);
if (!funcType.empty())
bytecode.setFunctionTypeInfo(std::move(funcType));
}
if (func->vararg)
bytecode.emitABC(LOP_PREPVARARGS, uint8_t(self + func->args.size), 0, 0);
@ -560,15 +569,7 @@ struct Compiler
size_t oldLocals = localStack.size();
std::vector<InlineArg> args;
if (FFlag::LuauCompileInlineDefer)
{
args.reserve(func->args.size);
}
else
{
// note that we push the frame early; this is needed to block recursive inline attempts
inlineFrames.push_back({func, oldLocals, target, targetCount});
}
args.reserve(func->args.size);
// evaluate all arguments; note that we don't emit code for constant arguments (relying on constant folding)
// note that compiler state (variable registers/values) does not change here - we defer that to a separate loop below to handle nested calls
@ -590,16 +591,8 @@ struct Compiler
else
LUAU_ASSERT(!"Unexpected expression type");
if (FFlag::LuauCompileInlineDefer)
{
for (size_t j = i; j < func->args.size; ++j)
args.push_back({func->args.data[j], uint8_t(reg + (j - i))});
}
else
{
for (size_t j = i; j < func->args.size; ++j)
pushLocal(func->args.data[j], uint8_t(reg + (j - i)));
}
for (size_t j = i; j < func->args.size; ++j)
args.push_back({func->args.data[j], uint8_t(reg + (j - i))});
// all remaining function arguments have been allocated and assigned to
break;
@ -614,26 +607,17 @@ struct Compiler
else
bytecode.emitABC(LOP_LOADNIL, reg, 0, 0);
if (FFlag::LuauCompileInlineDefer)
args.push_back({var, reg});
else
pushLocal(var, reg);
args.push_back({var, reg});
}
else if (arg == nullptr)
{
// since the argument is not mutated, we can simply fold the value into the expressions that need it
if (FFlag::LuauCompileInlineDefer)
args.push_back({var, kInvalidReg, {Constant::Type_Nil}});
else
locstants[var] = {Constant::Type_Nil};
args.push_back({var, kInvalidReg, {Constant::Type_Nil}});
}
else if (const Constant* cv = constants.find(arg); cv && cv->type != Constant::Type_Unknown)
{
// since the argument is not mutated, we can simply fold the value into the expressions that need it
if (FFlag::LuauCompileInlineDefer)
args.push_back({var, kInvalidReg, *cv});
else
locstants[var] = *cv;
args.push_back({var, kInvalidReg, *cv});
}
else
{
@ -643,20 +627,14 @@ struct Compiler
// if the argument is a local that isn't mutated, we will simply reuse the existing register
if (int reg = le ? getExprLocalReg(le) : -1; reg >= 0 && (!lv || !lv->written))
{
if (FFlag::LuauCompileInlineDefer)
args.push_back({var, uint8_t(reg)});
else
pushLocal(var, uint8_t(reg));
args.push_back({var, uint8_t(reg)});
}
else
{
uint8_t temp = allocReg(arg, 1);
compileExprTemp(arg, temp);
if (FFlag::LuauCompileInlineDefer)
args.push_back({var, temp});
else
pushLocal(var, temp);
args.push_back({var, temp});
}
}
}
@ -668,19 +646,16 @@ struct Compiler
compileExprAuto(expr->args.data[i], rsi);
}
if (FFlag::LuauCompileInlineDefer)
{
// apply all evaluated arguments to the compiler state
// note: locals use current startpc for debug info, although some of them have been computed earlier; this is similar to compileStatLocal
for (InlineArg& arg : args)
if (arg.value.type == Constant::Type_Unknown)
pushLocal(arg.local, arg.reg);
else
locstants[arg.local] = arg.value;
// apply all evaluated arguments to the compiler state
// note: locals use current startpc for debug info, although some of them have been computed earlier; this is similar to compileStatLocal
for (InlineArg& arg : args)
if (arg.value.type == Constant::Type_Unknown)
pushLocal(arg.local, arg.reg);
else
locstants[arg.local] = arg.value;
// the inline frame will be used to compile return statements as well as to reject recursive inlining attempts
inlineFrames.push_back({func, oldLocals, target, targetCount});
}
// the inline frame will be used to compile return statements as well as to reject recursive inlining attempts
inlineFrames.push_back({func, oldLocals, target, targetCount});
// fold constant values updated above into expressions in the function body
foldConstants(constants, variables, locstants, builtinsFold, func->body);

106
luau/Compiler/src/Types.cpp Normal file
View File

@ -0,0 +1,106 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/BytecodeBuilder.h"
#include "Types.h"
namespace Luau
{
static LuauBytecodeEncodedType getType(AstType* ty)
{
if (AstTypeReference* ref = ty->as<AstTypeReference>())
{
if (ref->name == "nil")
return LBC_TYPE_NIL;
else if (ref->name == "boolean")
return LBC_TYPE_BOOLEAN;
else if (ref->name == "number")
return LBC_TYPE_NUMBER;
else if (ref->name == "string")
return LBC_TYPE_STRING;
else if (ref->name == "thread")
return LBC_TYPE_THREAD;
else if (ref->name == "any" || ref->name == "unknown")
return LBC_TYPE_ANY;
}
else if (AstTypeTable* table = ty->as<AstTypeTable>())
{
return LBC_TYPE_TABLE;
}
else if (AstTypeFunction* func = ty->as<AstTypeFunction>())
{
return LBC_TYPE_FUNCTION;
}
else if (AstTypeUnion* un = ty->as<AstTypeUnion>())
{
bool optional = false;
LuauBytecodeEncodedType type = LBC_TYPE_INVALID;
for (AstType* ty : un->types)
{
LuauBytecodeEncodedType et = getType(ty);
if (et == LBC_TYPE_NIL)
{
optional = true;
continue;
}
if (type == LBC_TYPE_INVALID)
{
type = et;
continue;
}
if (type != et)
return LBC_TYPE_ANY;
}
if (type == LBC_TYPE_INVALID)
return LBC_TYPE_ANY;
return LuauBytecodeEncodedType(type | (optional && (type != LBC_TYPE_ANY) ? LBC_TYPE_OPTIONAL_BIT : 0));
}
else if (AstTypeIntersection* inter = ty->as<AstTypeIntersection>())
{
return LBC_TYPE_ANY;
}
return LBC_TYPE_ANY;
}
std::string getFunctionType(const AstExprFunction* func)
{
if (func->vararg || func->generics.size || func->genericPacks.size)
return {};
bool self = func->self != 0;
std::string typeInfo;
typeInfo.reserve(func->args.size + self + 2);
typeInfo.push_back(LBC_TYPE_FUNCTION);
typeInfo.push_back(uint8_t(self + func->args.size));
if (self)
typeInfo.push_back(LBC_TYPE_TABLE);
bool haveNonAnyParam = false;
for (AstLocal* arg : func->args)
{
LuauBytecodeEncodedType ty = arg->annotation ? getType(arg->annotation) : LBC_TYPE_ANY;
if (ty != LBC_TYPE_ANY)
haveNonAnyParam = true;
typeInfo.push_back(ty);
}
// If all parameters simplify to any, we can just omit type info for this function
if (!haveNonAnyParam)
return {};
return typeInfo;
}
} // namespace Luau

View File

@ -0,0 +1,9 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/Ast.h"
namespace Luau
{
std::string getFunctionType(const AstExprFunction* func);
} // namespace Luau

View File

@ -17,8 +17,6 @@
#include <string.h>
LUAU_FASTFLAGVARIABLE(LuauUniformTopHandling, false)
/*
** {======================================================
** Error-recovery functions
@ -252,7 +250,7 @@ void luaD_call(lua_State* L, StkId func, int nresults)
L->isactive = false;
}
if (FFlag::LuauUniformTopHandling && nresults != LUA_MULTRET)
if (nresults != LUA_MULTRET)
L->top = restorestack(L, old_func) + nresults;
L->nCcalls--;

View File

@ -24,7 +24,6 @@ LUAI_FUNC void luaV_gettable(lua_State* L, const TValue* t, TValue* key, StkId v
LUAI_FUNC void luaV_settable(lua_State* L, const TValue* t, TValue* key, StkId val);
LUAI_FUNC void luaV_concat(lua_State* L, int total, int last);
LUAI_FUNC void luaV_getimport(lua_State* L, Table* env, TValue* k, StkId res, uint32_t id, bool propagatenil);
LUAI_FUNC void luaV_getimport_dep(lua_State* L, Table* env, TValue* k, uint32_t id, bool propagatenil);
LUAI_FUNC void luaV_prepareFORN(lua_State* L, StkId plimit, StkId pstep, StkId pinit);
LUAI_FUNC void luaV_callTM(lua_State* L, int nparams, int res);
LUAI_FUNC void luaV_tryfuncTM(lua_State* L, StkId func);

View File

@ -16,9 +16,6 @@
#include <string.h>
LUAU_FASTFLAG(LuauUniformTopHandling)
LUAU_FASTFLAG(LuauGetImportDirect)
// Disable c99-designator to avoid the warning in CGOTO dispatch table
#ifdef __clang__
#if __has_warning("-Wc99-designator")
@ -433,20 +430,8 @@ reentry:
{
uint32_t aux = *pc++;
if (FFlag::LuauGetImportDirect)
{
VM_PROTECT(luaV_getimport(L, cl->env, k, ra, aux, /* propagatenil= */ false));
VM_NEXT();
}
else
{
VM_PROTECT(luaV_getimport_dep(L, cl->env, k, aux, /* propagatenil= */ false));
ra = VM_REG(LUAU_INSN_A(insn)); // previous call may change the stack
setobj2s(L, ra, L->top - 1);
L->top--;
VM_NEXT();
}
VM_PROTECT(luaV_getimport(L, cl->env, k, ra, aux, /* propagatenil= */ false));
VM_NEXT();
}
}
@ -1043,8 +1028,6 @@ reentry:
// we're done!
if (LUAU_UNLIKELY(ci->flags & LUA_CALLINFO_RETURN))
{
if (!FFlag::LuauUniformTopHandling)
L->top = res;
goto exit;
}

View File

@ -13,8 +13,6 @@
#include <string.h>
LUAU_FASTFLAGVARIABLE(LuauGetImportDirect, false)
// TODO: RAII deallocation doesn't work for longjmp builds if a memory error happens
template<typename T>
struct TempBuffer
@ -77,34 +75,6 @@ void luaV_getimport(lua_State* L, Table* env, TValue* k, StkId res, uint32_t id,
luaV_gettable(L, res, &k[id2], res);
}
void luaV_getimport_dep(lua_State* L, Table* env, TValue* k, uint32_t id, bool propagatenil)
{
LUAU_ASSERT(!FFlag::LuauGetImportDirect);
int count = id >> 30;
int id0 = count > 0 ? int(id >> 20) & 1023 : -1;
int id1 = count > 1 ? int(id >> 10) & 1023 : -1;
int id2 = count > 2 ? int(id) & 1023 : -1;
// allocate a stack slot so that we can do table lookups
luaD_checkstack(L, 1);
setnilvalue(L->top);
L->top++;
// global lookup into L->top-1
TValue g;
sethvalue(L, &g, env);
luaV_gettable(L, &g, &k[id0], L->top - 1);
// table lookup for id1
if (id1 >= 0 && (!propagatenil || !ttisnil(L->top - 1)))
luaV_gettable(L, L->top - 1, &k[id1], L->top - 1);
// table lookup for id2
if (id2 >= 0 && (!propagatenil || !ttisnil(L->top - 1)))
luaV_gettable(L, L->top - 1, &k[id2], L->top - 1);
}
template<typename T>
static T read(const char* data, size_t size, size_t& offset)
{
@ -153,17 +123,12 @@ static void resolveImportSafe(lua_State* L, Table* env, TValue* k, uint32_t id)
// note: we call getimport with nil propagation which means that accesses to table chains like A.B.C will resolve in nil
// this is technically not necessary but it reduces the number of exceptions when loading scripts that rely on getfenv/setfenv for global
// injection
if (FFlag::LuauGetImportDirect)
{
// allocate a stack slot so that we can do table lookups
luaD_checkstack(L, 1);
setnilvalue(L->top);
L->top++;
// allocate a stack slot so that we can do table lookups
luaD_checkstack(L, 1);
setnilvalue(L->top);
L->top++;
luaV_getimport(L, L->gt, self->k, L->top - 1, self->id, /* propagatenil= */ true);
}
else
luaV_getimport_dep(L, L->gt, self->k, self->id, /* propagatenil= */ true);
luaV_getimport(L, L->gt, self->k, L->top - 1, self->id, /* propagatenil= */ true);
}
};
@ -194,6 +159,8 @@ int luau_load(lua_State* L, const char* chunkname, const char* data, size_t size
uint8_t version = read<uint8_t>(data, size, offset);
// 0 means the rest of the bytecode is the error message
if (version == 0)
{
@ -221,6 +188,13 @@ int luau_load(lua_State* L, const char* chunkname, const char* data, size_t size
TString* source = luaS_new(L, chunkname);
if (version >= 4)
{
uint8_t typesversion = read<uint8_t>(data, size, offset);
LUAU_ASSERT(typesversion == 1);
}
// string table
unsigned int stringCount = readVarInt(data, size, offset);
TempBuffer<TString*> strings(L, stringCount);
@ -248,6 +222,25 @@ int luau_load(lua_State* L, const char* chunkname, const char* data, size_t size
p->nups = read<uint8_t>(data, size, offset);
p->is_vararg = read<uint8_t>(data, size, offset);
if (version >= 4)
{
uint8_t cgflags = read<uint8_t>(data, size, offset);
LUAU_ASSERT(cgflags == 0);
uint32_t typesize = readVarInt(data, size, offset);
if (typesize)
{
uint8_t* types = (uint8_t*)data + offset;
LUAU_ASSERT(typesize == unsigned(2 + p->numparams));
LUAU_ASSERT(types[0] == LBC_TYPE_FUNCTION);
LUAU_ASSERT(types[1] == p->numparams);
offset += typesize;
}
}
p->sizecode = readVarInt(data, size, offset);
p->code = luaM_newarray(L, p->sizecode, Instruction, p->memcat);
for (int j = 0; j < p->sizecode; ++j)