diff --git a/Cargo.toml b/Cargo.toml index c6f4937..7caca60 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "luau0-src" -version = "0.5.9+luau579" +version = "0.5.10+luau581" authors = ["Aleksandr Orlenko "] edition = "2021" repository = "https://github.com/khvzak/luau-src-rs" diff --git a/luau/Ast/include/Luau/Ast.h b/luau/Ast/include/Luau/Ast.h index a486ad0..f9f9ab4 100644 --- a/luau/Ast/include/Luau/Ast.h +++ b/luau/Ast/include/Luau/Ast.h @@ -801,12 +801,20 @@ struct AstDeclaredClassProp bool isMethod = false; }; +struct AstTableIndexer +{ + AstType* indexType; + AstType* resultType; + Location location; +}; + class AstStatDeclareClass : public AstStat { public: LUAU_RTTI(AstStatDeclareClass) - AstStatDeclareClass(const Location& location, const AstName& name, std::optional superName, const AstArray& props); + AstStatDeclareClass(const Location& location, const AstName& name, std::optional superName, const AstArray& props, + AstTableIndexer* indexer = nullptr); void visit(AstVisitor* visitor) override; @@ -814,6 +822,7 @@ public: std::optional superName; AstArray props; + AstTableIndexer* indexer; }; class AstType : public AstNode @@ -862,13 +871,6 @@ struct AstTableProp AstType* type; }; -struct AstTableIndexer -{ - AstType* indexType; - AstType* resultType; - Location location; -}; - class AstTypeTable : public AstType { public: diff --git a/luau/Ast/src/Ast.cpp b/luau/Ast/src/Ast.cpp index d2c552a..3c87e36 100644 --- a/luau/Ast/src/Ast.cpp +++ b/luau/Ast/src/Ast.cpp @@ -714,12 +714,13 @@ void AstStatDeclareFunction::visit(AstVisitor* visitor) } } -AstStatDeclareClass::AstStatDeclareClass( - const Location& location, const AstName& name, std::optional superName, const AstArray& props) +AstStatDeclareClass::AstStatDeclareClass(const Location& location, const AstName& name, std::optional superName, + const AstArray& props, AstTableIndexer* indexer) : AstStat(ClassIndex(), location) , name(name) , superName(superName) , props(props) + , indexer(indexer) { } diff --git a/luau/Ast/src/Parser.cpp b/luau/Ast/src/Parser.cpp index 7cae609..cc5d7b3 100644 --- a/luau/Ast/src/Parser.cpp +++ b/luau/Ast/src/Parser.cpp @@ -13,6 +13,7 @@ // See docs/SyntaxChanges.md for an explanation. LUAU_FASTINTVARIABLE(LuauRecursionLimit, 1000) LUAU_FASTINTVARIABLE(LuauParseErrorLimit, 100) +LUAU_FASTFLAGVARIABLE(LuauParseDeclareClassIndexer, false) #define ERROR_INVALID_INTERP_DOUBLE_BRACE "Double braces are not permitted within interpolated strings. Did you mean '\\{'?" @@ -877,6 +878,7 @@ AstStat* Parser::parseDeclaration(const Location& start) } TempVector props(scratchDeclaredClassProps); + AstTableIndexer* indexer = nullptr; while (lexer.current().type != Lexeme::ReservedEnd) { @@ -885,7 +887,8 @@ AstStat* Parser::parseDeclaration(const Location& start) { props.push_back(parseDeclaredClassMethod()); } - else if (lexer.current().type == '[') + else if (lexer.current().type == '[' && (!FFlag::LuauParseDeclareClassIndexer || lexer.lookahead().type == Lexeme::RawString || + lexer.lookahead().type == Lexeme::QuotedString)) { const Lexeme begin = lexer.current(); nextLexeme(); // [ @@ -904,6 +907,22 @@ AstStat* Parser::parseDeclaration(const Location& start) else report(begin.location, "String literal contains malformed escape sequence"); } + else if (lexer.current().type == '[' && FFlag::LuauParseDeclareClassIndexer) + { + if (indexer) + { + // maybe we don't need to parse the entire badIndexer... + // however, we either have { or [ to lint, not the entire table type or the bad indexer. + AstTableIndexer* badIndexer = parseTableIndexer(); + + // we lose all additional indexer expressions from the AST after error recovery here + report(badIndexer->location, "Cannot have more than one class indexer"); + } + else + { + indexer = parseTableIndexer(); + } + } else { Name propName = parseName("property name"); @@ -916,7 +935,7 @@ AstStat* Parser::parseDeclaration(const Location& start) Location classEnd = lexer.current().location; nextLexeme(); // skip past `end` - return allocator.alloc(Location(classStart, classEnd), className.name, superName, copy(props)); + return allocator.alloc(Location(classStart, classEnd), className.name, superName, copy(props), indexer); } else if (std::optional globalName = parseNameOpt("global variable name")) { diff --git a/luau/CodeGen/include/Luau/AddressA64.h b/luau/CodeGen/include/Luau/AddressA64.h index acb64e3..097cc13 100644 --- a/luau/CodeGen/include/Luau/AddressA64.h +++ b/luau/CodeGen/include/Luau/AddressA64.h @@ -14,13 +14,10 @@ namespace A64 enum class AddressKindA64 : uint8_t { - imm, // reg + imm - reg, // reg + reg - - // TODO: - // reg + reg << shift - // reg + sext(reg) << shift - // reg + uext(reg) << shift + reg, // reg + reg + imm, // reg + imm + pre, // reg + imm, reg += imm + post, // reg, reg += imm }; struct AddressA64 @@ -29,13 +26,14 @@ struct AddressA64 // For example, ldr x0, [reg+imm] is limited to 8 KB offsets assuming imm is divisible by 8, but loading into w0 reduces the range to 4 KB static constexpr size_t kMaxOffset = 1023; - constexpr AddressA64(RegisterA64 base, int off = 0) - : kind(AddressKindA64::imm) + constexpr AddressA64(RegisterA64 base, int off = 0, AddressKindA64 kind = AddressKindA64::imm) + : kind(kind) , base(base) , offset(xzr) , data(off) { LUAU_ASSERT(base.kind == KindA64::x || base == sp); + LUAU_ASSERT(kind != AddressKindA64::reg); } constexpr AddressA64(RegisterA64 base, RegisterA64 offset) diff --git a/luau/CodeGen/include/Luau/AssemblyBuilderX64.h b/luau/CodeGen/include/Luau/AssemblyBuilderX64.h index 9e7d501..aea01ee 100644 --- a/luau/CodeGen/include/Luau/AssemblyBuilderX64.h +++ b/luau/CodeGen/include/Luau/AssemblyBuilderX64.h @@ -98,6 +98,8 @@ public: void call(Label& label); void call(OperandX64 op); + void lea(RegisterX64 lhs, Label& label); + void int3(); void ud2(); @@ -243,6 +245,7 @@ private: LUAU_NOINLINE void log(const char* opcode, OperandX64 op1, OperandX64 op2, OperandX64 op3, OperandX64 op4); LUAU_NOINLINE void log(Label label); LUAU_NOINLINE void log(const char* opcode, Label label); + LUAU_NOINLINE void log(const char* opcode, RegisterX64 reg, Label label); void log(OperandX64 op); const char* getSizeName(SizeX64 size) const; diff --git a/luau/CodeGen/include/Luau/IrAnalysis.h b/luau/CodeGen/include/Luau/IrAnalysis.h index 5418009..ca1eba6 100644 --- a/luau/CodeGen/include/Luau/IrAnalysis.h +++ b/luau/CodeGen/include/Luau/IrAnalysis.h @@ -1,6 +1,8 @@ // This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details #pragma once +#include "Luau/Common.h" + #include #include #include @@ -37,6 +39,16 @@ struct RegisterSet void requireVariadicSequence(RegisterSet& sourceRs, const RegisterSet& defRs, uint8_t varargStart); +struct BlockOrdering +{ + uint32_t depth = 0; + + uint32_t preOrder = ~0u; + uint32_t postOrder = ~0u; + + bool visited = false; +}; + struct CfgInfo { std::vector predecessors; @@ -45,6 +57,15 @@ struct CfgInfo std::vector successors; std::vector successorsOffsets; + // Immediate dominators (unique parent in the dominator tree) + std::vector idoms; + + // Children in the dominator tree + std::vector domChildren; + std::vector domChildrenOffsets; + + std::vector domOrdering; + // VM registers that are live when the block is entered // Additionally, an active variadic sequence can exist at the entry of the block std::vector in; @@ -64,6 +85,18 @@ struct CfgInfo RegisterSet captured; }; +// A quick refresher on dominance and dominator trees: +// * If A is a dominator of B (A dom B), you can never execute B without executing A first +// * A is a strict dominator of B (A sdom B) is similar to previous one but A != B +// * Immediate dominator node N (idom N) is a unique node T so that T sdom N, +// but T does not strictly dominate any other node that dominates N. +// * Dominance frontier is a set of nodes where dominance of a node X ends. +// In practice this is where values established by node X might no longer hold because of join edges from other nodes coming in. +// This is also where PHI instructions in SSA are placed. +void computeCfgImmediateDominators(IrFunction& function); +void computeCfgDominanceTreeChildren(IrFunction& function); + +// Function used to update all CFG data void computeCfgInfo(IrFunction& function); struct BlockIteratorWrapper @@ -90,10 +123,17 @@ struct BlockIteratorWrapper { return itEnd; } + + uint32_t operator[](size_t pos) const + { + LUAU_ASSERT(pos < size_t(itEnd - itBegin)); + return itBegin[pos]; + } }; BlockIteratorWrapper predecessors(const CfgInfo& cfg, uint32_t blockIdx); BlockIteratorWrapper successors(const CfgInfo& cfg, uint32_t blockIdx); +BlockIteratorWrapper domChildren(const CfgInfo& cfg, uint32_t blockIdx); } // namespace CodeGen } // namespace Luau diff --git a/luau/CodeGen/include/Luau/IrData.h b/luau/CodeGen/include/Luau/IrData.h index 0e17cba..1c79ccb 100644 --- a/luau/CodeGen/include/Luau/IrData.h +++ b/luau/CodeGen/include/Luau/IrData.h @@ -801,6 +801,8 @@ struct IrBlock uint32_t start = ~0u; uint32_t finish = ~0u; + uint32_t sortkey = ~0u; + Label label; }; @@ -823,6 +825,7 @@ struct IrFunction uint32_t validRestoreOpBlockIdx = 0; Proto* proto = nullptr; + bool variadic = false; CfgInfo cfg; diff --git a/luau/CodeGen/include/Luau/IrDump.h b/luau/CodeGen/include/Luau/IrDump.h index 179edd0..2f86ebf 100644 --- a/luau/CodeGen/include/Luau/IrDump.h +++ b/luau/CodeGen/include/Luau/IrDump.h @@ -38,6 +38,8 @@ std::string toString(const IrFunction& function, bool includeUseInfo); std::string dump(const IrFunction& function); std::string toDot(const IrFunction& function, bool includeInst); +std::string toDotCfg(const IrFunction& function); +std::string toDotDjGraph(const IrFunction& function); std::string dumpDot(const IrFunction& function, bool includeInst); diff --git a/luau/CodeGen/src/AssemblyBuilderA64.cpp b/luau/CodeGen/src/AssemblyBuilderA64.cpp index 99a6848..c62d797 100644 --- a/luau/CodeGen/src/AssemblyBuilderA64.cpp +++ b/luau/CodeGen/src/AssemblyBuilderA64.cpp @@ -876,6 +876,9 @@ void AssemblyBuilderA64::placeA(const char* name, RegisterA64 dst, AddressA64 sr switch (src.kind) { + case AddressKindA64::reg: + place(dst.index | (src.base.index << 5) | (0b011'0'10 << 10) | (src.offset.index << 16) | (1 << 21) | (opsize << 22)); + break; case AddressKindA64::imm: if (unsigned(src.data >> sizelog) < 1024 && (src.data & ((1 << sizelog) - 1)) == 0) place(dst.index | (src.base.index << 5) | ((src.data >> sizelog) << 10) | (opsize << 22) | (1 << 24)); @@ -884,8 +887,13 @@ void AssemblyBuilderA64::placeA(const char* name, RegisterA64 dst, AddressA64 sr else LUAU_ASSERT(!"Unable to encode large immediate offset"); break; - case AddressKindA64::reg: - place(dst.index | (src.base.index << 5) | (0b011'0'10 << 10) | (src.offset.index << 16) | (1 << 21) | (opsize << 22)); + case AddressKindA64::pre: + LUAU_ASSERT(src.data >= -256 && src.data <= 255); + place(dst.index | (src.base.index << 5) | (0b11 << 10) | ((src.data & ((1 << 9) - 1)) << 12) | (opsize << 22)); + break; + case AddressKindA64::post: + LUAU_ASSERT(src.data >= -256 && src.data <= 255); + place(dst.index | (src.base.index << 5) | (0b01 << 10) | ((src.data & ((1 << 9) - 1)) << 12) | (opsize << 22)); break; } @@ -1312,23 +1320,37 @@ void AssemblyBuilderA64::log(RegisterA64 reg) void AssemblyBuilderA64::log(AddressA64 addr) { - text.append("["); switch (addr.kind) { - case AddressKindA64::imm: - log(addr.base); - if (addr.data != 0) - logAppend(",#%d", addr.data); - break; case AddressKindA64::reg: + text.append("["); log(addr.base); text.append(","); log(addr.offset); + text.append("]"); + break; + case AddressKindA64::imm: + text.append("["); + log(addr.base); if (addr.data != 0) - logAppend(" LSL #%d", addr.data); + logAppend(",#%d", addr.data); + text.append("]"); + break; + case AddressKindA64::pre: + text.append("["); + log(addr.base); + if (addr.data != 0) + logAppend(",#%d", addr.data); + text.append("]!"); + break; + case AddressKindA64::post: + text.append("["); + log(addr.base); + text.append("]!"); + if (addr.data != 0) + logAppend(",#%d", addr.data); break; } - text.append("]"); } } // namespace A64 diff --git a/luau/CodeGen/src/AssemblyBuilderX64.cpp b/luau/CodeGen/src/AssemblyBuilderX64.cpp index 426a025..2a8bc92 100644 --- a/luau/CodeGen/src/AssemblyBuilderX64.cpp +++ b/luau/CodeGen/src/AssemblyBuilderX64.cpp @@ -463,6 +463,20 @@ void AssemblyBuilderX64::call(OperandX64 op) commit(); } +void AssemblyBuilderX64::lea(RegisterX64 lhs, Label& label) +{ + LUAU_ASSERT(lhs.size == SizeX64::qword); + + placeBinaryRegAndRegMem(lhs, OperandX64(SizeX64::qword, noreg, 1, rip, 0), 0x8d, 0x8d); + + codePos -= 4; + placeLabel(label); + commit(); + + if (logText) + log("lea", lhs, label); +} + void AssemblyBuilderX64::int3() { if (logText) @@ -1501,6 +1515,14 @@ void AssemblyBuilderX64::log(const char* opcode, Label label) logAppend(" %-12s.L%d\n", opcode, label.id); } +void AssemblyBuilderX64::log(const char* opcode, RegisterX64 reg, Label label) +{ + logAppend(" %-12s", opcode); + log(reg); + text.append(","); + logAppend(".L%d\n", label.id); +} + void AssemblyBuilderX64::log(OperandX64 op) { switch (op.cat) diff --git a/luau/CodeGen/src/CodeGen.cpp b/luau/CodeGen/src/CodeGen.cpp index 4ee8e44..d7283b4 100644 --- a/luau/CodeGen/src/CodeGen.cpp +++ b/luau/CodeGen/src/CodeGen.cpp @@ -125,7 +125,7 @@ static bool lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction& return (a.kind == IrBlockKind::Fallback) < (b.kind == IrBlockKind::Fallback); // Try to order by instruction order - return a.start < b.start; + return a.sortkey < b.sortkey; }); // For each IR instruction that begins a bytecode instruction, which bytecode instruction is it? @@ -234,6 +234,8 @@ static bool lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction& build.setLabel(abandoned.label); } + lowering.finishFunction(); + return false; } } @@ -244,7 +246,15 @@ static bool lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction& build.logAppend("#\n"); } - if (outputEnabled && !options.includeOutlinedCode && seenFallback) + if (!seenFallback) + { + textSize = build.text.length(); + codeSize = build.getCodeSize(); + } + + lowering.finishFunction(); + + if (outputEnabled && !options.includeOutlinedCode && textSize < build.text.size()) { build.text.resize(textSize); @@ -268,7 +278,7 @@ static bool lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction& [[maybe_unused]] static bool lowerIr( A64::AssemblyBuilderA64& build, IrBuilder& ir, NativeState& data, ModuleHelpers& helpers, Proto* proto, AssemblyOptions options) { - A64::IrLoweringA64 lowering(build, helpers, data, proto, ir.function); + A64::IrLoweringA64 lowering(build, helpers, data, ir.function); return lowerImpl(build, lowering, ir.function, proto->bytecodeid, options); } @@ -594,6 +604,12 @@ std::string getAssembly(lua_State* L, int idx, AssemblyOptions options) X64::assembleHelpers(build, helpers); #endif + if (!options.includeOutlinedCode && options.includeAssembly) + { + build.text.clear(); + build.logAppend("; skipping %u bytes of outlined helpers\n", unsigned(build.getCodeSize() * sizeof(build.code[0]))); + } + for (Proto* p : protos) if (p) if (std::optional np = assembleFunction(build, data, helpers, p, options)) diff --git a/luau/CodeGen/src/CodeGenA64.cpp b/luau/CodeGen/src/CodeGenA64.cpp index c5042fc..cc01318 100644 --- a/luau/CodeGen/src/CodeGenA64.cpp +++ b/luau/CodeGen/src/CodeGenA64.cpp @@ -117,6 +117,81 @@ static void emitReentry(AssemblyBuilderA64& build, ModuleHelpers& helpers) build.br(x4); } +void emitReturn(AssemblyBuilderA64& build, ModuleHelpers& helpers) +{ + // x1 = res + // w2 = number of written values + + // x0 = ci + build.ldr(x0, mem(rState, offsetof(lua_State, ci))); + // w3 = ci->nresults + build.ldr(w3, mem(x0, offsetof(CallInfo, nresults))); + + Label skipResultCopy; + + // Fill the rest of the expected results (nresults - written) with 'nil' + build.cmp(w2, w3); + build.b(ConditionA64::GreaterEqual, skipResultCopy); + + // TODO: cmp above could compute this and flags using subs + build.sub(w2, w3, w2); // counter = nresults - written + build.mov(w4, LUA_TNIL); + + Label repeatNilLoop = build.setLabel(); + build.str(w4, mem(x1, offsetof(TValue, tt))); + build.add(x1, x1, sizeof(TValue)); + build.sub(w2, w2, 1); + build.cbnz(w2, repeatNilLoop); + + build.setLabel(skipResultCopy); + + // x2 = cip = ci - 1 + build.sub(x2, x0, sizeof(CallInfo)); + + // res = cip->top when nresults >= 0 + Label skipFixedRetTop; + build.tbnz(w3, 31, skipFixedRetTop); + build.ldr(x1, mem(x2, offsetof(CallInfo, top))); // res = cip->top + build.setLabel(skipFixedRetTop); + + // Update VM state (ci, base, top) + build.str(x2, mem(rState, offsetof(lua_State, ci))); // L->ci = cip + build.ldr(rBase, mem(x2, offsetof(CallInfo, base))); // sync base = L->base while we have a chance + build.str(rBase, mem(rState, offsetof(lua_State, base))); // L->base = cip->base + + build.str(x1, mem(rState, offsetof(lua_State, top))); // L->top = res + + // Unlikely, but this might be the last return from VM + build.ldr(w4, mem(x0, offsetof(CallInfo, flags))); + build.tbnz(w4, countrz(LUA_CALLINFO_RETURN), helpers.exitNoContinueVm); + + // Continue in interpreter if function has no native data + build.ldr(w4, mem(x2, offsetof(CallInfo, flags))); + build.tbz(w4, countrz(LUA_CALLINFO_NATIVE), helpers.exitContinueVm); + + // Need to update state of the current function before we jump away + build.ldr(rClosure, mem(x2, offsetof(CallInfo, func))); + build.ldr(rClosure, mem(rClosure, offsetof(TValue, value.gc))); + + build.ldr(x1, mem(rClosure, offsetof(Closure, l.p))); // cl->l.p aka proto + + LUAU_ASSERT(offsetof(Proto, code) == offsetof(Proto, k) + 8); + build.ldp(rConstants, rCode, mem(x1, offsetof(Proto, k))); // proto->k, proto->code + + // Get instruction index from instruction pointer + // To get instruction index from instruction pointer, we need to divide byte offset by 4 + // But we will actually need to scale instruction index by 4 back to byte offset later so it cancels out + build.ldr(x2, mem(x2, offsetof(CallInfo, savedpc))); // cip->savedpc + build.sub(x2, x2, rCode); + + // Get new instruction location and jump to it + LUAU_ASSERT(offsetof(Proto, exectarget) == offsetof(Proto, execdata) + 8); + build.ldp(x3, x4, mem(x1, offsetof(Proto, execdata))); + build.ldr(w2, mem(x3, x2)); + build.add(x4, x4, x2); + build.br(x4); +} + static EntryLocations buildEntryFunction(AssemblyBuilderA64& build, UnwindBuilder& unwind) { EntryLocations locations; @@ -213,23 +288,28 @@ void assembleHelpers(AssemblyBuilderA64& build, ModuleHelpers& helpers) { if (build.logText) build.logAppend("; exitContinueVm\n"); - helpers.exitContinueVm = build.setLabel(); + build.setLabel(helpers.exitContinueVm); emitExit(build, /* continueInVm */ true); if (build.logText) build.logAppend("; exitNoContinueVm\n"); - helpers.exitNoContinueVm = build.setLabel(); + build.setLabel(helpers.exitNoContinueVm); emitExit(build, /* continueInVm */ false); if (build.logText) build.logAppend("; reentry\n"); - helpers.reentry = build.setLabel(); + build.setLabel(helpers.reentry); emitReentry(build, helpers); if (build.logText) build.logAppend("; interrupt\n"); - helpers.interrupt = build.setLabel(); + build.setLabel(helpers.interrupt); emitInterrupt(build); + + if (build.logText) + build.logAppend("; return\n"); + build.setLabel(helpers.return_); + emitReturn(build, helpers); } } // namespace A64 diff --git a/luau/CodeGen/src/CodeGenUtils.cpp b/luau/CodeGen/src/CodeGenUtils.cpp index a7131e1..20269cf 100644 --- a/luau/CodeGen/src/CodeGenUtils.cpp +++ b/luau/CodeGen/src/CodeGenUtils.cpp @@ -17,8 +17,6 @@ #include -LUAU_FASTFLAG(LuauUniformTopHandling) - // All external function calls that can cause stack realloc or Lua calls have to be wrapped in VM_PROTECT // This makes sure that we save the pc (in case the Lua call needs to generate a backtrace) before the call, // and restores the stack pointer after in case stack gets reallocated @@ -306,44 +304,6 @@ Closure* callFallback(lua_State* L, StkId ra, StkId argtop, int nresults) } } -// Extracted as-is from lvmexecute.cpp with the exception of control flow (reentry) and removed interrupts -Closure* returnFallback(lua_State* L, StkId ra, StkId valend) -{ - // ci is our callinfo, cip is our parent - CallInfo* ci = L->ci; - CallInfo* cip = ci - 1; - - StkId res = ci->func; // note: we assume CALL always puts func+args and expects results to start at func - StkId vali = ra; - - int nresults = ci->nresults; - - // copy return values into parent stack (but only up to nresults!), fill the rest with nil - // note: in MULTRET context nresults starts as -1 so i != 0 condition never activates intentionally - int i; - for (i = nresults; i != 0 && vali < valend; i--) - setobj2s(L, res++, vali++); - while (i-- > 0) - setnilvalue(res++); - - // pop the stack frame - L->ci = cip; - L->base = cip->base; - L->top = (nresults == LUA_MULTRET) ? res : cip->top; - - // we're done! - if (LUAU_UNLIKELY(ci->flags & LUA_CALLINFO_RETURN)) - { - if (!FFlag::LuauUniformTopHandling) - L->top = res; - return NULL; - } - - // keep executing new function - LUAU_ASSERT(isLua(cip)); - return clvalue(cip->func); -} - const Instruction* executeGETGLOBAL(lua_State* L, const Instruction* pc, StkId base, TValue* k) { [[maybe_unused]] Closure* cl = clvalue(L->ci->func); diff --git a/luau/CodeGen/src/CodeGenUtils.h b/luau/CodeGen/src/CodeGenUtils.h index 87b6ec4..a30d7e9 100644 --- a/luau/CodeGen/src/CodeGenUtils.h +++ b/luau/CodeGen/src/CodeGenUtils.h @@ -18,7 +18,6 @@ Closure* callProlog(lua_State* L, TValue* ra, StkId argtop, int nresults); void callEpilogC(lua_State* L, int nresults, int n); Closure* callFallback(lua_State* L, StkId ra, StkId argtop, int nresults); -Closure* returnFallback(lua_State* L, StkId ra, StkId valend); const Instruction* executeGETGLOBAL(lua_State* L, const Instruction* pc, StkId base, TValue* k); const Instruction* executeSETGLOBAL(lua_State* L, const Instruction* pc, StkId base, TValue* k); diff --git a/luau/CodeGen/src/CodeGenX64.cpp b/luau/CodeGen/src/CodeGenX64.cpp index ec032c0..41c3dbd 100644 --- a/luau/CodeGen/src/CodeGenX64.cpp +++ b/luau/CodeGen/src/CodeGenX64.cpp @@ -56,6 +56,11 @@ static EntryLocations buildEntryFunction(AssemblyBuilderX64& build, UnwindBuilde locations.start = build.setLabel(); unwind.startFunction(); + RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi; + RegisterX64 rArg2 = (build.abi == ABIX64::Windows) ? rdx : rsi; + RegisterX64 rArg3 = (build.abi == ABIX64::Windows) ? r8 : rdx; + RegisterX64 rArg4 = (build.abi == ABIX64::Windows) ? r9 : rcx; + // Save common non-volatile registers if (build.abi == ABIX64::SystemV) { @@ -177,18 +182,28 @@ void assembleHelpers(X64::AssemblyBuilderX64& build, ModuleHelpers& helpers) { if (build.logText) build.logAppend("; exitContinueVm\n"); - helpers.exitContinueVm = build.setLabel(); + build.setLabel(helpers.exitContinueVm); emitExit(build, /* continueInVm */ true); if (build.logText) build.logAppend("; exitNoContinueVm\n"); - helpers.exitNoContinueVm = build.setLabel(); + build.setLabel(helpers.exitNoContinueVm); emitExit(build, /* continueInVm */ false); if (build.logText) build.logAppend("; continueCallInVm\n"); - helpers.continueCallInVm = build.setLabel(); + build.setLabel(helpers.continueCallInVm); emitContinueCallInVm(build); + + if (build.logText) + build.logAppend("; interrupt\n"); + build.setLabel(helpers.interrupt); + emitInterrupt(build); + + if (build.logText) + build.logAppend("; return\n"); + build.setLabel(helpers.return_); + emitReturn(build, helpers); } } // namespace X64 diff --git a/luau/CodeGen/src/EmitCommon.h b/luau/CodeGen/src/EmitCommon.h index 6b19912..f912ffb 100644 --- a/luau/CodeGen/src/EmitCommon.h +++ b/luau/CodeGen/src/EmitCommon.h @@ -24,13 +24,14 @@ struct ModuleHelpers // A64/X64 Label exitContinueVm; Label exitNoContinueVm; + Label return_; + Label interrupt; // X64 Label continueCallInVm; // A64 Label reentry; // x0: closure - Label interrupt; // x0: pc offset, x1: return address, x2: interrupt }; } // namespace CodeGen diff --git a/luau/CodeGen/src/EmitCommonX64.cpp b/luau/CodeGen/src/EmitCommonX64.cpp index 0095f28..f240d26 100644 --- a/luau/CodeGen/src/EmitCommonX64.cpp +++ b/luau/CodeGen/src/EmitCommonX64.cpp @@ -278,39 +278,34 @@ void emitUpdateBase(AssemblyBuilderX64& build) build.mov(rBase, qword[rState + offsetof(lua_State, base)]); } -static void emitSetSavedPc(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos) +void emitInterrupt(AssemblyBuilderX64& build) { - ScopedRegX64 tmp1{regs, SizeX64::qword}; - ScopedRegX64 tmp2{regs, SizeX64::qword}; + // rax = pcpos + 1 + // rbx = return address in native code - build.mov(tmp1.reg, sCode); - build.add(tmp1.reg, pcpos * sizeof(Instruction)); - build.mov(tmp2.reg, qword[rState + offsetof(lua_State, ci)]); - build.mov(qword[tmp2.reg + offsetof(CallInfo, savedpc)], tmp1.reg); -} + // note: rbx is non-volatile so it will be saved across interrupt call automatically + + RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi; + RegisterX64 rArg2 = (build.abi == ABIX64::Windows) ? rdx : rsi; -void emitInterrupt(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos) -{ Label skip; - ScopedRegX64 tmp{regs, SizeX64::qword}; + // Update L->ci->savedpc; required in case interrupt errors + build.mov(rcx, sCode); + build.lea(rcx, addr[rcx + rax * sizeof(Instruction)]); + build.mov(rax, qword[rState + offsetof(lua_State, ci)]); + build.mov(qword[rax + offsetof(CallInfo, savedpc)], rcx); - // Skip if there is no interrupt set - build.mov(tmp.reg, qword[rState + offsetof(lua_State, global)]); - build.mov(tmp.reg, qword[tmp.reg + offsetof(global_State, cb.interrupt)]); - build.test(tmp.reg, tmp.reg); + // Load interrupt handler; it may be nullptr in case the update raced with the check before we got here + build.mov(rax, qword[rState + offsetof(lua_State, global)]); + build.mov(rax, qword[rax + offsetof(global_State, cb.interrupt)]); + build.test(rax, rax); build.jcc(ConditionX64::Zero, skip); - emitSetSavedPc(regs, build, pcpos + 1); - // Call interrupt - // TODO: This code should move to the end of the function, or even be outlined so that it can be shared by multiple interruptible instructions - IrCallWrapperX64 callWrap(regs, build); - callWrap.addArgument(SizeX64::qword, rState); - callWrap.addArgument(SizeX64::dword, -1); - callWrap.call(tmp.release()); - - emitUpdateBase(build); // interrupt may have reallocated stack + build.mov(rArg1, rState); + build.mov(dwordReg(rArg2), -1); + build.call(rax); // Check if we need to exit build.mov(al, byte[rState + offsetof(lua_State, status)]); @@ -322,6 +317,10 @@ void emitInterrupt(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos) emitExit(build, /* continueInVm */ false); build.setLabel(skip); + + emitUpdateBase(build); // interrupt may have reallocated stack + + build.jmp(rbx); } void emitFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int offset, int pcpos) @@ -352,6 +351,90 @@ void emitContinueCallInVm(AssemblyBuilderX64& build) emitExit(build, /* continueInVm */ true); } +void emitReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers) +{ + // input: res in rdi, number of written values in ecx + RegisterX64 res = rdi; + RegisterX64 written = ecx; + + RegisterX64 ci = r8; + RegisterX64 cip = r9; + RegisterX64 nresults = esi; + + build.mov(ci, qword[rState + offsetof(lua_State, ci)]); + build.lea(cip, addr[ci - sizeof(CallInfo)]); + + // nresults = ci->nresults + build.mov(nresults, dword[ci + offsetof(CallInfo, nresults)]); + + Label skipResultCopy; + + // Fill the rest of the expected results (nresults - written) with 'nil' + RegisterX64 counter = written; + build.sub(counter, nresults); // counter = -(nresults - written) + build.jcc(ConditionX64::GreaterEqual, skipResultCopy); + + Label repeatNilLoop = build.setLabel(); + build.mov(dword[res + offsetof(TValue, tt)], LUA_TNIL); + build.add(res, sizeof(TValue)); + build.inc(counter); + build.jcc(ConditionX64::NotZero, repeatNilLoop); + + build.setLabel(skipResultCopy); + + build.mov(qword[rState + offsetof(lua_State, ci)], cip); // L->ci = cip + build.mov(rBase, qword[cip + offsetof(CallInfo, base)]); // sync base = L->base while we have a chance + build.mov(qword[rState + offsetof(lua_State, base)], rBase); // L->base = cip->base + + Label skipFixedRetTop; + build.test(nresults, nresults); // test here will set SF=1 for a negative number and it always sets OF to 0 + build.jcc(ConditionX64::Less, skipFixedRetTop); // jl jumps if SF != OF + build.mov(res, qword[cip + offsetof(CallInfo, top)]); // res = cip->top + build.setLabel(skipFixedRetTop); + + build.mov(qword[rState + offsetof(lua_State, top)], res); // L->top = res + + // Unlikely, but this might be the last return from VM + build.test(byte[ci + offsetof(CallInfo, flags)], LUA_CALLINFO_RETURN); + build.jcc(ConditionX64::NotZero, helpers.exitNoContinueVm); + + // Returning back to the previous function is a bit tricky + // Registers alive: r9 (cip) + RegisterX64 proto = rcx; + RegisterX64 execdata = rbx; + + // Change closure + build.mov(rax, qword[cip + offsetof(CallInfo, func)]); + build.mov(rax, qword[rax + offsetof(TValue, value.gc)]); + build.mov(sClosure, rax); + + build.mov(proto, qword[rax + offsetof(Closure, l.p)]); + + build.mov(execdata, qword[proto + offsetof(Proto, execdata)]); + + build.test(byte[cip + offsetof(CallInfo, flags)], LUA_CALLINFO_NATIVE); + build.jcc(ConditionX64::Zero, helpers.exitContinueVm); // Continue in interpreter if function has no native data + + // Change constants + build.mov(rConstants, qword[proto + offsetof(Proto, k)]); + + // Change code + build.mov(rdx, qword[proto + offsetof(Proto, code)]); + build.mov(sCode, rdx); + + build.mov(rax, qword[cip + offsetof(CallInfo, savedpc)]); + + // To get instruction index from instruction pointer, we need to divide byte offset by 4 + // But we will actually need to scale instruction index by 4 back to byte offset later so it cancels out + build.sub(rax, rdx); + + // Get new instruction location and jump to it + build.mov(edx, dword[execdata + rax]); + build.add(rdx, qword[proto + offsetof(Proto, exectarget)]); + build.jmp(rdx); +} + + } // namespace X64 } // namespace CodeGen } // namespace Luau diff --git a/luau/CodeGen/src/EmitCommonX64.h b/luau/CodeGen/src/EmitCommonX64.h index 3f723f4..37be73f 100644 --- a/luau/CodeGen/src/EmitCommonX64.h +++ b/luau/CodeGen/src/EmitCommonX64.h @@ -53,31 +53,6 @@ constexpr OperandX64 sCode = qword[rsp + kStackSize + 8]; // Instruction* cod constexpr OperandX64 sTemporarySlot = addr[rsp + kStackSize + 16]; constexpr OperandX64 sSpillArea = addr[rsp + kStackSize + 24]; -// TODO: These should be replaced with a portable call function that checks the ABI at runtime and reorders moves accordingly to avoid conflicts -#if defined(_WIN32) - -constexpr RegisterX64 rArg1 = rcx; -constexpr RegisterX64 rArg2 = rdx; -constexpr RegisterX64 rArg3 = r8; -constexpr RegisterX64 rArg4 = r9; -constexpr RegisterX64 rArg5 = noreg; -constexpr RegisterX64 rArg6 = noreg; -constexpr OperandX64 sArg5 = qword[rsp + 32]; -constexpr OperandX64 sArg6 = qword[rsp + 40]; - -#else - -constexpr RegisterX64 rArg1 = rdi; -constexpr RegisterX64 rArg2 = rsi; -constexpr RegisterX64 rArg3 = rdx; -constexpr RegisterX64 rArg4 = rcx; -constexpr RegisterX64 rArg5 = r8; -constexpr RegisterX64 rArg6 = r9; -constexpr OperandX64 sArg5 = noreg; -constexpr OperandX64 sArg6 = noreg; - -#endif - inline OperandX64 luauReg(int ri) { return xmmword[rBase + ri * sizeof(TValue)]; @@ -202,11 +177,13 @@ void callStepGc(IrRegAllocX64& regs, AssemblyBuilderX64& build); void emitExit(AssemblyBuilderX64& build, bool continueInVm); void emitUpdateBase(AssemblyBuilderX64& build); -void emitInterrupt(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos); +void emitInterrupt(AssemblyBuilderX64& build); void emitFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int offset, int pcpos); void emitContinueCallInVm(AssemblyBuilderX64& build); +void emitReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers); + } // namespace X64 } // namespace CodeGen } // namespace Luau diff --git a/luau/CodeGen/src/EmitInstructionX64.cpp b/luau/CodeGen/src/EmitInstructionX64.cpp index f2012ca..61d5ac6 100644 --- a/luau/CodeGen/src/EmitInstructionX64.cpp +++ b/luau/CodeGen/src/EmitInstructionX64.cpp @@ -18,6 +18,12 @@ namespace X64 void emitInstCall(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults) { + // TODO: This should use IrCallWrapperX64 + RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi; + RegisterX64 rArg2 = (build.abi == ABIX64::Windows) ? rdx : rsi; + RegisterX64 rArg3 = (build.abi == ABIX64::Windows) ? r8 : rdx; + RegisterX64 rArg4 = (build.abi == ABIX64::Windows) ? r9 : rcx; + build.mov(rArg1, rState); build.lea(rArg2, luauRegAddress(ra)); @@ -163,167 +169,90 @@ void emitInstCall(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int } } -void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int actualResults) +void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int actualResults, bool functionVariadic) { - RegisterX64 ci = r8; - RegisterX64 cip = r9; RegisterX64 res = rdi; - RegisterX64 nresults = esi; - - build.mov(ci, qword[rState + offsetof(lua_State, ci)]); - build.lea(cip, addr[ci - sizeof(CallInfo)]); - - // res = ci->func; note: we assume CALL always puts func+args and expects results to start at func - build.mov(res, qword[ci + offsetof(CallInfo, func)]); - // nresults = ci->nresults - build.mov(nresults, dword[ci + offsetof(CallInfo, nresults)]); + RegisterX64 written = ecx; + if (functionVariadic) { - Label skipResultCopy; + build.mov(res, qword[rState + offsetof(lua_State, ci)]); + build.mov(res, qword[res + offsetof(CallInfo, func)]); + } + else if (actualResults != 1) + build.lea(res, addr[rBase - sizeof(TValue)]); // invariant: ci->func + 1 == ci->base for non-variadic frames - RegisterX64 counter = ecx; - - if (actualResults == 0) + if (actualResults == 0) + { + build.xor_(written, written); + build.jmp(helpers.return_); + } + else if (actualResults == 1 && !functionVariadic) + { + // fast path: minimizes res adjustments + // note that we skipped res computation for this specific case above + build.vmovups(xmm0, luauReg(ra)); + build.vmovups(xmmword[rBase - sizeof(TValue)], xmm0); + build.mov(res, rBase); + build.mov(written, 1); + build.jmp(helpers.return_); + } + else if (actualResults >= 1 && actualResults <= 3) + { + for (int r = 0; r < actualResults; ++r) { - // Our instruction doesn't have any results, so just fill results expected in parent with 'nil' - build.test(nresults, nresults); // test here will set SF=1 for a negative number, ZF=1 for zero and OF=0 - build.jcc(ConditionX64::LessEqual, skipResultCopy); // jle jumps if SF != OF or ZF == 1 - - build.mov(counter, nresults); - - Label repeatNilLoop = build.setLabel(); - build.mov(dword[res + offsetof(TValue, tt)], LUA_TNIL); - build.add(res, sizeof(TValue)); - build.dec(counter); - build.jcc(ConditionX64::NotZero, repeatNilLoop); + build.vmovups(xmm0, luauReg(ra + r)); + build.vmovups(xmmword[res + r * sizeof(TValue)], xmm0); } - else if (actualResults == 1) - { - // Try setting our 1 result - build.test(nresults, nresults); - build.jcc(ConditionX64::Zero, skipResultCopy); + build.add(res, actualResults * sizeof(TValue)); + build.mov(written, actualResults); + build.jmp(helpers.return_); + } + else + { + RegisterX64 vali = rax; + RegisterX64 valend = rdx; - build.lea(counter, addr[nresults - 1]); + // vali = ra + build.lea(vali, luauRegAddress(ra)); - build.vmovups(xmm0, luauReg(ra)); - build.vmovups(xmmword[res], xmm0); - build.add(res, sizeof(TValue)); - - // Fill the rest of the expected results with 'nil' - build.test(counter, counter); // test here will set SF=1 for a negative number, ZF=1 for zero and OF=0 - build.jcc(ConditionX64::LessEqual, skipResultCopy); // jle jumps if SF != OF or ZF == 1 - - Label repeatNilLoop = build.setLabel(); - build.mov(dword[res + offsetof(TValue, tt)], LUA_TNIL); - build.add(res, sizeof(TValue)); - build.dec(counter); - build.jcc(ConditionX64::NotZero, repeatNilLoop); - } + // Copy as much as possible for MULTRET calls, and only as much as needed otherwise + if (actualResults == LUA_MULTRET) + build.mov(valend, qword[rState + offsetof(lua_State, top)]); // valend = L->top else + build.lea(valend, luauRegAddress(ra + actualResults)); // valend = ra + actualResults + + build.xor_(written, written); + + Label repeatValueLoop, exitValueLoop; + + if (actualResults == LUA_MULTRET) { - RegisterX64 vali = rax; - RegisterX64 valend = rdx; - - // Copy return values into parent stack (but only up to nresults!) - build.test(nresults, nresults); - build.jcc(ConditionX64::Zero, skipResultCopy); - - // vali = ra - build.lea(vali, luauRegAddress(ra)); - - // Copy as much as possible for MULTRET calls, and only as much as needed otherwise - if (actualResults == LUA_MULTRET) - build.mov(valend, qword[rState + offsetof(lua_State, top)]); // valend = L->top - else - build.lea(valend, luauRegAddress(ra + actualResults)); // valend = ra + actualResults - - build.mov(counter, nresults); - - Label repeatValueLoop, exitValueLoop; - - build.setLabel(repeatValueLoop); build.cmp(vali, valend); build.jcc(ConditionX64::NotBelow, exitValueLoop); - - build.vmovups(xmm0, xmmword[vali]); - build.vmovups(xmmword[res], xmm0); - build.add(vali, sizeof(TValue)); - build.add(res, sizeof(TValue)); - build.dec(counter); - build.jcc(ConditionX64::NotZero, repeatValueLoop); - - build.setLabel(exitValueLoop); - - // Fill the rest of the expected results with 'nil' - build.test(counter, counter); // test here will set SF=1 for a negative number, ZF=1 for zero and OF=0 - build.jcc(ConditionX64::LessEqual, skipResultCopy); // jle jumps if SF != OF or ZF == 1 - - Label repeatNilLoop = build.setLabel(); - build.mov(dword[res + offsetof(TValue, tt)], LUA_TNIL); - build.add(res, sizeof(TValue)); - build.dec(counter); - build.jcc(ConditionX64::NotZero, repeatNilLoop); } - build.setLabel(skipResultCopy); + build.setLabel(repeatValueLoop); + build.vmovups(xmm0, xmmword[vali]); + build.vmovups(xmmword[res], xmm0); + build.add(vali, sizeof(TValue)); + build.add(res, sizeof(TValue)); + build.inc(written); + build.cmp(vali, valend); + build.jcc(ConditionX64::Below, repeatValueLoop); + + build.setLabel(exitValueLoop); + build.jmp(helpers.return_); } - - build.mov(qword[rState + offsetof(lua_State, ci)], cip); // L->ci = cip - build.mov(rBase, qword[cip + offsetof(CallInfo, base)]); // sync base = L->base while we have a chance - build.mov(qword[rState + offsetof(lua_State, base)], rBase); // L->base = cip->base - - // Start with result for LUA_MULTRET/exit value - build.mov(qword[rState + offsetof(lua_State, top)], res); // L->top = res - - // Unlikely, but this might be the last return from VM - build.test(byte[ci + offsetof(CallInfo, flags)], LUA_CALLINFO_RETURN); - build.jcc(ConditionX64::NotZero, helpers.exitNoContinueVm); - - Label skipFixedRetTop; - build.test(nresults, nresults); // test here will set SF=1 for a negative number and it always sets OF to 0 - build.jcc(ConditionX64::Less, skipFixedRetTop); // jl jumps if SF != OF - build.mov(rax, qword[cip + offsetof(CallInfo, top)]); - build.mov(qword[rState + offsetof(lua_State, top)], rax); // L->top = cip->top - build.setLabel(skipFixedRetTop); - - // Returning back to the previous function is a bit tricky - // Registers alive: r9 (cip) - RegisterX64 proto = rcx; - RegisterX64 execdata = rbx; - - // Change closure - build.mov(rax, qword[cip + offsetof(CallInfo, func)]); - build.mov(rax, qword[rax + offsetof(TValue, value.gc)]); - build.mov(sClosure, rax); - - build.mov(proto, qword[rax + offsetof(Closure, l.p)]); - - build.mov(execdata, qword[proto + offsetof(Proto, execdata)]); - - build.test(byte[cip + offsetof(CallInfo, flags)], LUA_CALLINFO_NATIVE); - build.jcc(ConditionX64::Zero, helpers.exitContinueVm); // Continue in interpreter if function has no native data - - // Change constants - build.mov(rConstants, qword[proto + offsetof(Proto, k)]); - - // Change code - build.mov(rdx, qword[proto + offsetof(Proto, code)]); - build.mov(sCode, rdx); - - build.mov(rax, qword[cip + offsetof(CallInfo, savedpc)]); - - // To get instruction index from instruction pointer, we need to divide byte offset by 4 - // But we will actually need to scale instruction index by 4 back to byte offset later so it cancels out - build.sub(rax, rdx); - - // Get new instruction location and jump to it - build.mov(edx, dword[execdata + rax]); - build.add(rdx, qword[proto + offsetof(Proto, exectarget)]); - build.jmp(rdx); } void emitInstSetList(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, int count, uint32_t index) { + // TODO: This should use IrCallWrapperX64 + RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi; + RegisterX64 rArg2 = (build.abi == ABIX64::Windows) ? rdx : rsi; + RegisterX64 rArg3 = (build.abi == ABIX64::Windows) ? r8 : rdx; + OperandX64 last = index + count - 1; // Using non-volatile 'rbx' for dynamic 'count' value (for LUA_MULTRET) to skip later recomputation @@ -426,6 +355,12 @@ void emitInstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRep // ipairs-style traversal is handled in IR LUAU_ASSERT(aux >= 0); + // TODO: This should use IrCallWrapperX64 + RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi; + RegisterX64 rArg2 = (build.abi == ABIX64::Windows) ? rdx : rsi; + RegisterX64 rArg3 = (build.abi == ABIX64::Windows) ? r8 : rdx; + RegisterX64 rArg4 = (build.abi == ABIX64::Windows) ? r9 : rcx; + // This is a fast-path for builtin table iteration, tag check for 'ra' has to be performed before emitting this instruction // Registers are chosen in this way to simplify fallback code for the node part diff --git a/luau/CodeGen/src/EmitInstructionX64.h b/luau/CodeGen/src/EmitInstructionX64.h index 84fe113..b248b7e 100644 --- a/luau/CodeGen/src/EmitInstructionX64.h +++ b/luau/CodeGen/src/EmitInstructionX64.h @@ -18,7 +18,7 @@ class AssemblyBuilderX64; struct IrRegAllocX64; void emitInstCall(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults); -void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int actualResults); +void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int actualResults, bool functionVariadic); void emitInstSetList(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, int count, uint32_t index); void emitInstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat); diff --git a/luau/CodeGen/src/IrAnalysis.cpp b/luau/CodeGen/src/IrAnalysis.cpp index 85811f0..14fc9b4 100644 --- a/luau/CodeGen/src/IrAnalysis.cpp +++ b/luau/CodeGen/src/IrAnalysis.cpp @@ -661,9 +661,212 @@ static void computeCfgBlockEdges(IrFunction& function) } } +// Assign tree depth and pre- and post- DFS visit order of the tree/graph nodes +// Optionally, collect required node order into a vector +template +void computeBlockOrdering( + IrFunction& function, std::vector& ordering, std::vector* preOrder, std::vector* postOrder) +{ + CfgInfo& info = function.cfg; + + LUAU_ASSERT(info.idoms.size() == function.blocks.size()); + + ordering.clear(); + ordering.resize(function.blocks.size()); + + // Get depth-first post-order using manual stack instead of recursion + struct StackItem + { + uint32_t blockIdx; + uint32_t itPos; + }; + std::vector stack; + + if (preOrder) + preOrder->reserve(function.blocks.size()); + if (postOrder) + postOrder->reserve(function.blocks.size()); + + uint32_t nextPreOrder = 0; + uint32_t nextPostOrder = 0; + + stack.push_back({0, 0}); + ordering[0].visited = true; + ordering[0].preOrder = nextPreOrder++; + + while (!stack.empty()) + { + StackItem& item = stack.back(); + BlockIteratorWrapper children = childIt(info, item.blockIdx); + + if (item.itPos < children.size()) + { + uint32_t childIdx = children[item.itPos++]; + + BlockOrdering& childOrdering = ordering[childIdx]; + + if (!childOrdering.visited) + { + childOrdering.visited = true; + childOrdering.depth = uint32_t(stack.size()); + childOrdering.preOrder = nextPreOrder++; + + if (preOrder) + preOrder->push_back(item.blockIdx); + + stack.push_back({childIdx, 0}); + } + } + else + { + ordering[item.blockIdx].postOrder = nextPostOrder++; + + if (postOrder) + postOrder->push_back(item.blockIdx); + + stack.pop_back(); + } + } +} + +// Dominance tree construction based on 'A Simple, Fast Dominance Algorithm' [Keith D. Cooper, et al] +// This solution has quadratic complexity in the worst case. +// It is possible to switch to SEMI-NCA algorithm (also quadratic) mentioned in 'Linear-Time Algorithms for Dominators and Related Problems' [Loukas +// Georgiadis] + +// Find block that is common between blocks 'a' and 'b' on the path towards the entry +static uint32_t findCommonDominator(const std::vector& idoms, const std::vector& data, uint32_t a, uint32_t b) +{ + while (a != b) + { + while (data[a].postOrder < data[b].postOrder) + { + a = idoms[a]; + LUAU_ASSERT(a != ~0u); + } + + while (data[b].postOrder < data[a].postOrder) + { + b = idoms[b]; + LUAU_ASSERT(b != ~0u); + } + } + + return a; +} + +void computeCfgImmediateDominators(IrFunction& function) +{ + CfgInfo& info = function.cfg; + + // Clear existing data + info.idoms.clear(); + info.idoms.resize(function.blocks.size(), ~0u); + + std::vector ordering; + std::vector blocksInPostOrder; + computeBlockOrdering(function, ordering, /* preOrder */ nullptr, &blocksInPostOrder); + + // Entry node is temporarily marked to be an idom of itself to make algorithm work + info.idoms[0] = 0; + + // Iteratively compute immediate dominators + bool updated = true; + + while (updated) + { + updated = false; + + // Go over blocks in reverse post-order of CFG + // '- 2' skips the root node which is last in post-order traversal + for (int i = int(blocksInPostOrder.size() - 2); i >= 0; i--) + { + uint32_t blockIdx = blocksInPostOrder[i]; + uint32_t newIdom = ~0u; + + for (uint32_t predIdx : predecessors(info, blockIdx)) + { + if (uint32_t predIdom = info.idoms[predIdx]; predIdom != ~0u) + { + if (newIdom == ~0u) + newIdom = predIdx; + else + newIdom = findCommonDominator(info.idoms, ordering, newIdom, predIdx); + } + } + + if (newIdom != info.idoms[blockIdx]) + { + info.idoms[blockIdx] = newIdom; + + // Run until a fixed point is reached + updated = true; + } + } + } + + // Entry node doesn't have an immediate dominator + info.idoms[0] = ~0u; +} + +void computeCfgDominanceTreeChildren(IrFunction& function) +{ + CfgInfo& info = function.cfg; + + // Clear existing data + info.domChildren.clear(); + + info.domChildrenOffsets.clear(); + info.domChildrenOffsets.resize(function.blocks.size()); + + // First we need to know children count of each node in the dominance tree + // We use offset array for to hold this data, counts will be readjusted to offsets later + for (size_t blockIdx = 0; blockIdx < function.blocks.size(); blockIdx++) + { + uint32_t domParent = info.idoms[blockIdx]; + + if (domParent != ~0u) + info.domChildrenOffsets[domParent]++; + } + + // Convert counds to offsets using prefix sum + uint32_t total = 0; + + for (size_t blockIdx = 0; blockIdx < function.blocks.size(); blockIdx++) + { + uint32_t& offset = info.domChildrenOffsets[blockIdx]; + uint32_t count = offset; + offset = total; + total += count; + } + + info.domChildren.resize(total); + + for (size_t blockIdx = 0; blockIdx < function.blocks.size(); blockIdx++) + { + // We use a trick here, where we use the starting offset of the dominance children list as the position where to write next child + // The values will be adjusted back in a separate loop later + uint32_t domParent = info.idoms[blockIdx]; + + if (domParent != ~0u) + info.domChildren[info.domChildrenOffsets[domParent]++] = uint32_t(blockIdx); + } + + // Offsets into the dominance children list were used as iterators in the previous loop + // That process basically moved the values in the array 1 step towards the start + // Here we move them one step towards the end and restore 0 for first offset + for (int blockIdx = int(function.blocks.size() - 1); blockIdx > 0; blockIdx--) + info.domChildrenOffsets[blockIdx] = info.domChildrenOffsets[blockIdx - 1]; + info.domChildrenOffsets[0] = 0; + + computeBlockOrdering(function, info.domOrdering, /* preOrder */ nullptr, /* postOrder */ nullptr); +} + void computeCfgInfo(IrFunction& function) { computeCfgBlockEdges(function); + computeCfgImmediateDominators(function); + computeCfgDominanceTreeChildren(function); computeCfgLiveInOutRegSets(function); } @@ -687,5 +890,15 @@ BlockIteratorWrapper successors(const CfgInfo& cfg, uint32_t blockIdx) return BlockIteratorWrapper{cfg.successors.data() + start, cfg.successors.data() + end}; } +BlockIteratorWrapper domChildren(const CfgInfo& cfg, uint32_t blockIdx) +{ + LUAU_ASSERT(blockIdx < cfg.domChildrenOffsets.size()); + + uint32_t start = cfg.domChildrenOffsets[blockIdx]; + uint32_t end = blockIdx + 1 < cfg.domChildrenOffsets.size() ? cfg.domChildrenOffsets[blockIdx + 1] : uint32_t(cfg.domChildren.size()); + + return BlockIteratorWrapper{cfg.domChildren.data() + start, cfg.domChildren.data() + end}; +} + } // namespace CodeGen } // namespace Luau diff --git a/luau/CodeGen/src/IrBuilder.cpp b/luau/CodeGen/src/IrBuilder.cpp index a12eca3..98db297 100644 --- a/luau/CodeGen/src/IrBuilder.cpp +++ b/luau/CodeGen/src/IrBuilder.cpp @@ -25,6 +25,7 @@ IrBuilder::IrBuilder() void IrBuilder::buildFunctionIr(Proto* proto) { function.proto = proto; + function.variadic = proto->is_vararg != 0; // Rebuild original control flow blocks rebuildBytecodeBasicBlocks(proto); @@ -428,6 +429,7 @@ void IrBuilder::beginBlock(IrOp block) LUAU_ASSERT(target.start == ~0u || target.start == uint32_t(function.instructions.size())); target.start = uint32_t(function.instructions.size()); + target.sortkey = target.start; inTerminatedBlock = false; } diff --git a/luau/CodeGen/src/IrDump.cpp b/luau/CodeGen/src/IrDump.cpp index 7ea9b79..09cafba 100644 --- a/luau/CodeGen/src/IrDump.cpp +++ b/luau/CodeGen/src/IrDump.cpp @@ -656,28 +656,23 @@ std::string dump(const IrFunction& function) return result; } -std::string toDot(const IrFunction& function, bool includeInst) +static void appendLabelRegset(IrToStringContext& ctx, const std::vector& regSets, size_t blockIdx, const char* name) { - std::string result; - IrToStringContext ctx{result, function.blocks, function.constants, function.cfg}; + if (blockIdx < regSets.size()) + { + const RegisterSet& rs = regSets[blockIdx]; - auto appendLabelRegset = [&ctx](const std::vector& regSets, size_t blockIdx, const char* name) { - if (blockIdx < regSets.size()) + if (rs.regs.any() || rs.varargSeq) { - const RegisterSet& rs = regSets[blockIdx]; - - if (rs.regs.any() || rs.varargSeq) - { - append(ctx.result, "|{%s|", name); - appendRegisterSet(ctx, rs, "|"); - append(ctx.result, "}"); - } + append(ctx.result, "|{%s|", name); + appendRegisterSet(ctx, rs, "|"); + append(ctx.result, "}"); } - }; - - append(ctx.result, "digraph CFG {\n"); - append(ctx.result, "node[shape=record]\n"); + } +} +static void appendBlocks(IrToStringContext& ctx, const IrFunction& function, bool includeInst, bool includeIn, bool includeOut, bool includeDef) +{ for (size_t i = 0; i < function.blocks.size(); i++) { const IrBlock& block = function.blocks[i]; @@ -692,7 +687,8 @@ std::string toDot(const IrFunction& function, bool includeInst) append(ctx.result, "label=\"{"); toString(ctx, block, uint32_t(i)); - appendLabelRegset(ctx.cfg.in, i, "in"); + if (includeIn) + appendLabelRegset(ctx, ctx.cfg.in, i, "in"); if (includeInst && block.start != ~0u) { @@ -709,11 +705,25 @@ std::string toDot(const IrFunction& function, bool includeInst) } } - appendLabelRegset(ctx.cfg.def, i, "def"); - appendLabelRegset(ctx.cfg.out, i, "out"); + if (includeDef) + appendLabelRegset(ctx, ctx.cfg.def, i, "def"); + + if (includeOut) + appendLabelRegset(ctx, ctx.cfg.out, i, "out"); append(ctx.result, "}\"];\n"); } +} + +std::string toDot(const IrFunction& function, bool includeInst) +{ + std::string result; + IrToStringContext ctx{result, function.blocks, function.constants, function.cfg}; + + append(ctx.result, "digraph CFG {\n"); + append(ctx.result, "node[shape=record]\n"); + + appendBlocks(ctx, function, includeInst, /* includeIn */ true, /* includeOut */ true, /* includeDef */ true); for (size_t i = 0; i < function.blocks.size(); i++) { @@ -750,6 +760,107 @@ std::string toDot(const IrFunction& function, bool includeInst) return result; } +std::string toDotCfg(const IrFunction& function) +{ + std::string result; + IrToStringContext ctx{result, function.blocks, function.constants, function.cfg}; + + append(ctx.result, "digraph CFG {\n"); + append(ctx.result, "node[shape=record]\n"); + + appendBlocks(ctx, function, /* includeInst */ false, /* includeIn */ false, /* includeOut */ false, /* includeDef */ true); + + for (size_t i = 0; i < function.blocks.size() && i < ctx.cfg.successorsOffsets.size(); i++) + { + BlockIteratorWrapper succ = successors(ctx.cfg, unsigned(i)); + + for (uint32_t target : succ) + append(ctx.result, "b%u -> b%u;\n", unsigned(i), target); + } + + append(ctx.result, "}\n"); + + return result; +} + +std::string toDotDjGraph(const IrFunction& function) +{ + std::string result; + IrToStringContext ctx{result, function.blocks, function.constants, function.cfg}; + + append(ctx.result, "digraph CFG {\n"); + + for (size_t i = 0; i < ctx.blocks.size(); i++) + { + const IrBlock& block = ctx.blocks[i]; + + append(ctx.result, "b%u [", unsigned(i)); + + if (block.kind == IrBlockKind::Fallback) + append(ctx.result, "style=filled;fillcolor=salmon;"); + else if (block.kind == IrBlockKind::Bytecode) + append(ctx.result, "style=filled;fillcolor=palegreen;"); + + append(ctx.result, "label=\""); + toString(ctx, block, uint32_t(i)); + append(ctx.result, "\"];\n"); + } + + // Layer by depth in tree + uint32_t depth = 0; + bool found = true; + + while (found) + { + found = false; + + append(ctx.result, "{rank = same;"); + for (size_t i = 0; i < ctx.cfg.domOrdering.size(); i++) + { + if (ctx.cfg.domOrdering[i].depth == depth) + { + append(ctx.result, "b%u;", unsigned(i)); + found = true; + } + } + append(ctx.result, "}\n"); + + depth++; + } + + for (size_t i = 0; i < ctx.cfg.domChildrenOffsets.size(); i++) + { + BlockIteratorWrapper dom = domChildren(ctx.cfg, unsigned(i)); + + for (uint32_t target : dom) + append(ctx.result, "b%u -> b%u;\n", unsigned(i), target); + + // Join edges are all successor edges that do not strongly dominate + BlockIteratorWrapper succ = successors(ctx.cfg, unsigned(i)); + + for (uint32_t successor : succ) + { + bool found = false; + + for (uint32_t target : dom) + { + if (target == successor) + { + found = true; + break; + } + } + + if (!found) + append(ctx.result, "b%u -> b%u [style=dotted];\n", unsigned(i), successor); + } + } + + append(ctx.result, "}\n"); + + return result; +} + std::string dumpDot(const IrFunction& function, bool includeInst) { std::string result = toDot(function, includeInst); diff --git a/luau/CodeGen/src/IrLoweringA64.cpp b/luau/CodeGen/src/IrLoweringA64.cpp index 5f62490..94c46db 100644 --- a/luau/CodeGen/src/IrLoweringA64.cpp +++ b/luau/CodeGen/src/IrLoweringA64.cpp @@ -185,11 +185,10 @@ static bool emitBuiltin( } } -IrLoweringA64::IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, NativeState& data, Proto* proto, IrFunction& function) +IrLoweringA64::IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, NativeState& data, IrFunction& function) : build(build) , helpers(helpers) , data(data) - , proto(proto) , function(function) , regs(function, {{x0, x15}, {x16, x17}, {q0, q7}, {q16, q31}}) , valueTracker(function) @@ -1166,25 +1165,17 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) } case IrCmd::INTERRUPT: { - RegisterA64 temp = regs.allocTemp(KindA64::x); + regs.spill(build, index); - Label skip, next; - build.ldr(temp, mem(rState, offsetof(lua_State, global))); - build.ldr(temp, mem(temp, offsetof(global_State, cb.interrupt))); - build.cbz(temp, skip); + Label self; - size_t spills = regs.spill(build, index); + build.ldr(x0, mem(rState, offsetof(lua_State, global))); + build.ldr(x0, mem(x0, offsetof(global_State, cb.interrupt))); + build.cbnz(x0, self); - // Jump to outlined interrupt handler, it will give back control to x1 - build.mov(x0, (uintOp(inst.a) + 1) * sizeof(Instruction)); - build.adr(x1, next); - build.b(helpers.interrupt); + Label next = build.setLabel(); - build.setLabel(next); - - regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state - - build.setLabel(skip); + interruptHandlers.push_back({self, uintOp(inst.a), next}); break; } case IrCmd::CHECK_GC: @@ -1343,19 +1334,71 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; case IrCmd::RETURN: regs.spill(build, index); - // valend = (n == LUA_MULTRET) ? L->top : ra + n - if (intOp(inst.b) == LUA_MULTRET) - build.ldr(x2, mem(rState, offsetof(lua_State, top))); - else - build.add(x2, rBase, uint16_t((vmRegOp(inst.a) + intOp(inst.b)) * sizeof(TValue))); - // returnFallback(L, ra, valend) - build.mov(x0, rState); - build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); - build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, returnFallback))); - build.blr(x3); - // reentry with x0=closure (NULL will trigger exit) - build.b(helpers.reentry); + if (function.variadic) + { + build.ldr(x1, mem(rState, offsetof(lua_State, ci))); + build.ldr(x1, mem(x1, offsetof(CallInfo, func))); + } + else if (intOp(inst.b) != 1) + build.sub(x1, rBase, sizeof(TValue)); // invariant: ci->func + 1 == ci->base for non-variadic frames + + if (intOp(inst.b) == 0) + { + build.mov(w2, 0); + build.b(helpers.return_); + } + else if (intOp(inst.b) == 1 && !function.variadic) + { + // fast path: minimizes x1 adjustments + // note that we skipped x1 computation for this specific case above + build.ldr(q0, mem(rBase, vmRegOp(inst.a) * sizeof(TValue))); + build.str(q0, mem(rBase, -int(sizeof(TValue)))); + build.mov(x1, rBase); + build.mov(w2, 1); + build.b(helpers.return_); + } + else if (intOp(inst.b) >= 1 && intOp(inst.b) <= 3) + { + for (int r = 0; r < intOp(inst.b); ++r) + { + build.ldr(q0, mem(rBase, (vmRegOp(inst.a) + r) * sizeof(TValue))); + build.str(q0, mem(x1, sizeof(TValue), AddressKindA64::post)); + } + build.mov(w2, intOp(inst.b)); + build.b(helpers.return_); + } + else + { + build.mov(w2, 0); + + // vali = ra + build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); + + // valend = (n == LUA_MULTRET) ? L->top : ra + n + if (intOp(inst.b) == LUA_MULTRET) + build.ldr(x4, mem(rState, offsetof(lua_State, top))); + else + build.add(x4, rBase, uint16_t((vmRegOp(inst.a) + intOp(inst.b)) * sizeof(TValue))); + + Label repeatValueLoop, exitValueLoop; + + if (intOp(inst.b) == LUA_MULTRET) + { + build.cmp(x3, x4); + build.b(ConditionA64::CarrySet, exitValueLoop); // CarrySet == UnsignedGreaterEqual + } + + build.setLabel(repeatValueLoop); + build.ldr(q0, mem(x3, sizeof(TValue), AddressKindA64::post)); + build.str(q0, mem(x1, sizeof(TValue), AddressKindA64::post)); + build.add(w2, w2, 1); + build.cmp(x3, x4); + build.b(ConditionA64::CarryClear, repeatValueLoop); // CarryClear == UnsignedLess + + build.setLabel(exitValueLoop); + build.b(helpers.return_); + } break; case IrCmd::FORGLOOP: // register layout: ra + 1 = table, ra + 2 = internal index, ra + 3 .. ra + aux = iteration variables @@ -1682,6 +1725,20 @@ void IrLoweringA64::finishBlock() regs.assertNoSpills(); } +void IrLoweringA64::finishFunction() +{ + if (build.logText) + build.logAppend("; interrupt handlers\n"); + + for (InterruptHandler& handler : interruptHandlers) + { + build.setLabel(handler.self); + build.mov(x0, (handler.pcpos + 1) * sizeof(Instruction)); + build.adr(x1, handler.next); + build.b(helpers.interrupt); + } +} + bool IrLoweringA64::hasError() const { return error; diff --git a/luau/CodeGen/src/IrLoweringA64.h b/luau/CodeGen/src/IrLoweringA64.h index 2647890..fc228cf 100644 --- a/luau/CodeGen/src/IrLoweringA64.h +++ b/luau/CodeGen/src/IrLoweringA64.h @@ -9,8 +9,6 @@ #include -struct Proto; - namespace Luau { namespace CodeGen @@ -25,10 +23,11 @@ namespace A64 struct IrLoweringA64 { - IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, NativeState& data, Proto* proto, IrFunction& function); + IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, NativeState& data, IrFunction& function); void lowerInst(IrInst& inst, uint32_t index, IrBlock& next); void finishBlock(); + void finishFunction(); bool hasError() const; @@ -55,10 +54,16 @@ struct IrLoweringA64 IrBlock& blockOp(IrOp op) const; Label& labelOp(IrOp op) const; + struct InterruptHandler + { + Label self; + unsigned int pcpos; + Label next; + }; + AssemblyBuilderA64& build; ModuleHelpers& helpers; NativeState& data; - Proto* proto = nullptr; // Temporarily required to provide 'Instruction* pc' to old emitInst* methods IrFunction& function; @@ -66,6 +71,8 @@ struct IrLoweringA64 IrValueLocationTracking valueTracker; + std::vector interruptHandlers; + bool error = false; }; diff --git a/luau/CodeGen/src/IrLoweringX64.cpp b/luau/CodeGen/src/IrLoweringX64.cpp index b9c35df..320cb07 100644 --- a/luau/CodeGen/src/IrLoweringX64.cpp +++ b/luau/CodeGen/src/IrLoweringX64.cpp @@ -958,8 +958,27 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) break; } case IrCmd::INTERRUPT: - emitInterrupt(regs, build, uintOp(inst.a)); + { + unsigned pcpos = uintOp(inst.a); + + // We unconditionally spill values here because that allows us to ignore register state when we synthesize interrupt handler + // This can be changed in the future if we can somehow record interrupt handler code separately + // Since interrupts are loop edges or call/ret, we don't have a significant opportunity for register reuse here anyway + regs.preserveAndFreeInstValues(); + + ScopedRegX64 tmp{regs, SizeX64::qword}; + + Label self; + + build.mov(tmp.reg, qword[rState + offsetof(lua_State, global)]); + build.cmp(qword[tmp.reg + offsetof(global_State, cb.interrupt)], 0); + build.jcc(ConditionX64::NotEqual, self); + + Label next = build.setLabel(); + + interruptHandlers.push_back({self, pcpos, next}); break; + } case IrCmd::CHECK_GC: callStepGc(regs, build); break; @@ -991,7 +1010,6 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) } case IrCmd::SET_SAVEDPC: { - // This is like emitSetSavedPc, but using register allocation instead of relying on rax/rdx ScopedRegX64 tmp1{regs, SizeX64::qword}; ScopedRegX64 tmp2{regs, SizeX64::qword}; @@ -1048,7 +1066,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next) case IrCmd::RETURN: regs.assertAllFree(); regs.assertNoSpills(); - emitInstReturn(build, helpers, vmRegOp(inst.a), intOp(inst.b)); + emitInstReturn(build, helpers, vmRegOp(inst.a), intOp(inst.b), function.variadic); break; case IrCmd::FORGLOOP: regs.assertAllFree(); @@ -1350,6 +1368,20 @@ void IrLoweringX64::finishBlock() regs.assertNoSpills(); } +void IrLoweringX64::finishFunction() +{ + if (build.logText) + build.logAppend("; interrupt handlers\n"); + + for (InterruptHandler& handler : interruptHandlers) + { + build.setLabel(handler.self); + build.mov(rax, handler.pcpos + 1); + build.lea(rbx, handler.next); + build.jmp(helpers.interrupt); + } +} + bool IrLoweringX64::hasError() const { // If register allocator had to use more stack slots than we have available, this function can't run natively diff --git a/luau/CodeGen/src/IrLoweringX64.h b/luau/CodeGen/src/IrLoweringX64.h index cab4a85..a375a33 100644 --- a/luau/CodeGen/src/IrLoweringX64.h +++ b/luau/CodeGen/src/IrLoweringX64.h @@ -29,6 +29,7 @@ struct IrLoweringX64 void lowerInst(IrInst& inst, uint32_t index, IrBlock& next); void finishBlock(); + void finishFunction(); bool hasError() const; @@ -53,6 +54,13 @@ struct IrLoweringX64 IrBlock& blockOp(IrOp op) const; Label& labelOp(IrOp op) const; + struct InterruptHandler + { + Label self; + unsigned int pcpos; + Label next; + }; + AssemblyBuilderX64& build; ModuleHelpers& helpers; NativeState& data; @@ -62,6 +70,8 @@ struct IrLoweringX64 IrRegAllocX64 regs; IrValueLocationTracking valueTracker; + + std::vector interruptHandlers; }; } // namespace X64 diff --git a/luau/CodeGen/src/NativeState.cpp b/luau/CodeGen/src/NativeState.cpp index 17977c3..14c1acd 100644 --- a/luau/CodeGen/src/NativeState.cpp +++ b/luau/CodeGen/src/NativeState.cpp @@ -90,7 +90,6 @@ void initFunctions(NativeState& data) data.context.callEpilogC = callEpilogC; data.context.callFallback = callFallback; - data.context.returnFallback = returnFallback; data.context.executeGETGLOBAL = executeGETGLOBAL; data.context.executeSETGLOBAL = executeSETGLOBAL; diff --git a/luau/CodeGen/src/NativeState.h b/luau/CodeGen/src/NativeState.h index 0140448..a2393bb 100644 --- a/luau/CodeGen/src/NativeState.h +++ b/luau/CodeGen/src/NativeState.h @@ -86,7 +86,6 @@ struct NativeContext void (*callEpilogC)(lua_State* L, int nresults, int n) = nullptr; Closure* (*callFallback)(lua_State* L, StkId ra, StkId argtop, int nresults) = nullptr; - Closure* (*returnFallback)(lua_State* L, StkId ra, StkId valend) = nullptr; // Opcode fallbacks, implemented in C const Instruction* (*executeGETGLOBAL)(lua_State* L, const Instruction* pc, StkId base, TValue* k) = nullptr; diff --git a/luau/CodeGen/src/OptimizeConstProp.cpp b/luau/CodeGen/src/OptimizeConstProp.cpp index 338bb49..b779fb4 100644 --- a/luau/CodeGen/src/OptimizeConstProp.cpp +++ b/luau/CodeGen/src/OptimizeConstProp.cpp @@ -1059,16 +1059,21 @@ static void tryCreateLinearBlock(IrBuilder& build, std::vector& visited // TODO: using values from the first block can cause 'live out' of the linear block predecessor to not have all required registers constPropInBlock(build, startingBlock, state); - // Veryfy that target hasn't changed + // Verify that target hasn't changed LUAU_ASSERT(function.instructions[startingBlock.finish].a.index == targetBlockIdx); + // Note: using startingBlock after this line is unsafe as the reference may be reallocated by build.block() below + uint32_t startingInsn = startingBlock.start; + // Create new linearized block into which we are going to redirect starting block jump IrOp newBlock = build.block(IrBlockKind::Linearized); visited.push_back(false); - // TODO: placement of linear blocks in final lowering is sub-optimal, it should follow our predecessor build.beginBlock(newBlock); + // By default, blocks are ordered according to start instruction; we alter sort order to make sure linearized block is placed right after the starting block + function.blocks[newBlock.index].sortkey = startingInsn + 1; + replace(function, termInst.a, newBlock); // Clone the collected path into our fresh block diff --git a/luau/Common/include/Luau/Bytecode.h b/luau/Common/include/Luau/Bytecode.h index 54086d5..eab57b1 100644 --- a/luau/Common/include/Luau/Bytecode.h +++ b/luau/Common/include/Luau/Bytecode.h @@ -413,8 +413,10 @@ enum LuauBytecodeTag { // Bytecode version; runtime supports [MIN, MAX], compiler emits TARGET by default but may emit a higher version when flags are enabled LBC_VERSION_MIN = 3, - LBC_VERSION_MAX = 3, + LBC_VERSION_MAX = 4, LBC_VERSION_TARGET = 3, + // Type encoding version + LBC_TYPE_VERSION = 1, // Types of constant table entries LBC_CONSTANT_NIL = 0, LBC_CONSTANT_BOOLEAN, @@ -425,6 +427,25 @@ enum LuauBytecodeTag LBC_CONSTANT_CLOSURE, }; +// Type table tags +enum LuauBytecodeEncodedType +{ + LBC_TYPE_NIL = 0, + LBC_TYPE_BOOLEAN, + LBC_TYPE_NUMBER, + LBC_TYPE_STRING, + LBC_TYPE_TABLE, + LBC_TYPE_FUNCTION, + LBC_TYPE_THREAD, + LBC_TYPE_USERDATA, + LBC_TYPE_VECTOR, + + LBC_TYPE_ANY = 15, + LBC_TYPE_OPTIONAL_BIT = 1 << 7, + + LBC_TYPE_INVALID = 256, +}; + // Builtin function ids, used in LOP_FASTCALL enum LuauBuiltinFunction { diff --git a/luau/Compiler/include/Luau/BytecodeBuilder.h b/luau/Compiler/include/Luau/BytecodeBuilder.h index ba4232a..f3c2f47 100644 --- a/luau/Compiler/include/Luau/BytecodeBuilder.h +++ b/luau/Compiler/include/Luau/BytecodeBuilder.h @@ -74,6 +74,8 @@ public: void foldJumps(); void expandJumps(); + void setFunctionTypeInfo(std::string value); + void setDebugFunctionName(StringRef name); void setDebugFunctionLineDefined(int line); void setDebugLine(int line); @@ -118,6 +120,7 @@ public: std::string dumpFunction(uint32_t id) const; std::string dumpEverything() const; std::string dumpSourceRemarks() const; + std::string dumpTypeInfo() const; void annotateInstruction(std::string& result, uint32_t fid, uint32_t instpos) const; @@ -132,6 +135,7 @@ public: static std::string getError(const std::string& message); static uint8_t getVersion(); + static uint8_t getTypeEncodingVersion(); private: struct Constant @@ -186,6 +190,7 @@ private: std::string dump; std::string dumpname; std::vector dumpinstoffs; + std::string typeinfo; }; struct DebugLocal diff --git a/luau/Compiler/src/BytecodeBuilder.cpp b/luau/Compiler/src/BytecodeBuilder.cpp index e2b769e..9296519 100644 --- a/luau/Compiler/src/BytecodeBuilder.cpp +++ b/luau/Compiler/src/BytecodeBuilder.cpp @@ -6,6 +6,8 @@ #include #include +LUAU_FASTFLAGVARIABLE(BytecodeVersion4, false) + namespace Luau { @@ -513,6 +515,11 @@ bool BytecodeBuilder::patchSkipC(size_t jumpLabel, size_t targetLabel) return true; } +void BytecodeBuilder::setFunctionTypeInfo(std::string value) +{ + functions[currentFunction].typeinfo = std::move(value); +} + void BytecodeBuilder::setDebugFunctionName(StringRef name) { unsigned int index = addStringTableEntry(name); @@ -606,6 +613,13 @@ void BytecodeBuilder::finalize() bytecode = char(version); + if (FFlag::BytecodeVersion4) + { + uint8_t typesversion = getTypeEncodingVersion(); + LUAU_ASSERT(typesversion == 1); + writeByte(bytecode, typesversion); + } + writeStringTable(bytecode); writeVarInt(bytecode, uint32_t(functions.size())); @@ -628,6 +642,14 @@ void BytecodeBuilder::writeFunction(std::string& ss, uint32_t id) const writeByte(ss, func.numupvalues); writeByte(ss, func.isvararg); + if (FFlag::BytecodeVersion4) + { + writeByte(ss, 0); // Reserved for cgflags + + writeVarInt(ss, uint32_t(func.typeinfo.size())); + ss.append(func.typeinfo); + } + // instructions writeVarInt(ss, uint32_t(insns.size())); @@ -1092,9 +1114,18 @@ std::string BytecodeBuilder::getError(const std::string& message) uint8_t BytecodeBuilder::getVersion() { // This function usually returns LBC_VERSION_TARGET but may sometimes return a higher number (within LBC_VERSION_MIN/MAX) under fast flags + + if (FFlag::BytecodeVersion4) + return 4; + return LBC_VERSION_TARGET; } +uint8_t BytecodeBuilder::getTypeEncodingVersion() +{ + return LBC_TYPE_VERSION; +} + #ifdef LUAU_ASSERTENABLED void BytecodeBuilder::validate() const { @@ -2269,6 +2300,75 @@ std::string BytecodeBuilder::dumpSourceRemarks() const return result; } +static const char* getBaseTypeString(uint8_t type) +{ + uint8_t tag = type & ~LBC_TYPE_OPTIONAL_BIT; + switch (tag) + { + case LBC_TYPE_NIL: + return "nil"; + case LBC_TYPE_BOOLEAN: + return "boolean"; + case LBC_TYPE_NUMBER: + return "number"; + case LBC_TYPE_STRING: + return "string"; + case LBC_TYPE_TABLE: + return "{ }"; + case LBC_TYPE_FUNCTION: + return "function( )"; + case LBC_TYPE_THREAD: + return "thread"; + case LBC_TYPE_USERDATA: + return "userdata"; + case LBC_TYPE_VECTOR: + return "vector"; + case LBC_TYPE_ANY: + return "any"; + } + + LUAU_ASSERT(!"Unhandled type in getBaseTypeString"); + return nullptr; +} + +std::string BytecodeBuilder::dumpTypeInfo() const +{ + std::string result; + + for (size_t i = 0; i < functions.size(); ++i) + { + const std::string& typeinfo = functions[i].typeinfo; + if (typeinfo.empty()) + continue; + + uint8_t encodedType = typeinfo[0]; + + LUAU_ASSERT(encodedType == LBC_TYPE_FUNCTION); + + formatAppend(result, "%zu: function(", i); + + LUAU_ASSERT(typeinfo.size() >= 2); + + uint8_t numparams = typeinfo[1]; + + LUAU_ASSERT(size_t(1 + numparams - 1) < typeinfo.size()); + + for (uint8_t i = 0; i < numparams; ++i) + { + uint8_t et = typeinfo[2 + i]; + const char* optional = (et & LBC_TYPE_OPTIONAL_BIT) ? "?" : ""; + formatAppend(result, "%s%s", getBaseTypeString(et), optional); + + if (i + 1 != numparams) + formatAppend(result, ", "); + } + + formatAppend(result, ")\n"); + } + + return result; +} + void BytecodeBuilder::annotateInstruction(std::string& result, uint32_t fid, uint32_t instpos) const { if ((dumpFlags & Dump_Code) == 0) diff --git a/luau/Compiler/src/Compiler.cpp b/luau/Compiler/src/Compiler.cpp index 6466722..8dd9876 100644 --- a/luau/Compiler/src/Compiler.cpp +++ b/luau/Compiler/src/Compiler.cpp @@ -10,6 +10,7 @@ #include "ConstantFolding.h" #include "CostModel.h" #include "TableShape.h" +#include "Types.h" #include "ValueTracking.h" #include @@ -25,7 +26,8 @@ LUAU_FASTINTVARIABLE(LuauCompileInlineThreshold, 25) LUAU_FASTINTVARIABLE(LuauCompileInlineThresholdMaxBoost, 300) LUAU_FASTINTVARIABLE(LuauCompileInlineDepth, 5) -LUAU_FASTFLAGVARIABLE(LuauCompileInlineDefer, false) +LUAU_FASTFLAGVARIABLE(CompileFunctionType, false) +LUAU_FASTFLAG(BytecodeVersion4) namespace Luau { @@ -202,6 +204,13 @@ struct Compiler setDebugLine(func); + if (FFlag::BytecodeVersion4 && FFlag::CompileFunctionType) + { + std::string funcType = getFunctionType(func); + if (!funcType.empty()) + bytecode.setFunctionTypeInfo(std::move(funcType)); + } + if (func->vararg) bytecode.emitABC(LOP_PREPVARARGS, uint8_t(self + func->args.size), 0, 0); @@ -560,15 +569,7 @@ struct Compiler size_t oldLocals = localStack.size(); std::vector args; - if (FFlag::LuauCompileInlineDefer) - { - args.reserve(func->args.size); - } - else - { - // note that we push the frame early; this is needed to block recursive inline attempts - inlineFrames.push_back({func, oldLocals, target, targetCount}); - } + args.reserve(func->args.size); // evaluate all arguments; note that we don't emit code for constant arguments (relying on constant folding) // note that compiler state (variable registers/values) does not change here - we defer that to a separate loop below to handle nested calls @@ -590,16 +591,8 @@ struct Compiler else LUAU_ASSERT(!"Unexpected expression type"); - if (FFlag::LuauCompileInlineDefer) - { - for (size_t j = i; j < func->args.size; ++j) - args.push_back({func->args.data[j], uint8_t(reg + (j - i))}); - } - else - { - for (size_t j = i; j < func->args.size; ++j) - pushLocal(func->args.data[j], uint8_t(reg + (j - i))); - } + for (size_t j = i; j < func->args.size; ++j) + args.push_back({func->args.data[j], uint8_t(reg + (j - i))}); // all remaining function arguments have been allocated and assigned to break; @@ -614,26 +607,17 @@ struct Compiler else bytecode.emitABC(LOP_LOADNIL, reg, 0, 0); - if (FFlag::LuauCompileInlineDefer) - args.push_back({var, reg}); - else - pushLocal(var, reg); + args.push_back({var, reg}); } else if (arg == nullptr) { // since the argument is not mutated, we can simply fold the value into the expressions that need it - if (FFlag::LuauCompileInlineDefer) - args.push_back({var, kInvalidReg, {Constant::Type_Nil}}); - else - locstants[var] = {Constant::Type_Nil}; + args.push_back({var, kInvalidReg, {Constant::Type_Nil}}); } else if (const Constant* cv = constants.find(arg); cv && cv->type != Constant::Type_Unknown) { // since the argument is not mutated, we can simply fold the value into the expressions that need it - if (FFlag::LuauCompileInlineDefer) - args.push_back({var, kInvalidReg, *cv}); - else - locstants[var] = *cv; + args.push_back({var, kInvalidReg, *cv}); } else { @@ -643,20 +627,14 @@ struct Compiler // if the argument is a local that isn't mutated, we will simply reuse the existing register if (int reg = le ? getExprLocalReg(le) : -1; reg >= 0 && (!lv || !lv->written)) { - if (FFlag::LuauCompileInlineDefer) - args.push_back({var, uint8_t(reg)}); - else - pushLocal(var, uint8_t(reg)); + args.push_back({var, uint8_t(reg)}); } else { uint8_t temp = allocReg(arg, 1); compileExprTemp(arg, temp); - if (FFlag::LuauCompileInlineDefer) - args.push_back({var, temp}); - else - pushLocal(var, temp); + args.push_back({var, temp}); } } } @@ -668,19 +646,16 @@ struct Compiler compileExprAuto(expr->args.data[i], rsi); } - if (FFlag::LuauCompileInlineDefer) - { - // apply all evaluated arguments to the compiler state - // note: locals use current startpc for debug info, although some of them have been computed earlier; this is similar to compileStatLocal - for (InlineArg& arg : args) - if (arg.value.type == Constant::Type_Unknown) - pushLocal(arg.local, arg.reg); - else - locstants[arg.local] = arg.value; + // apply all evaluated arguments to the compiler state + // note: locals use current startpc for debug info, although some of them have been computed earlier; this is similar to compileStatLocal + for (InlineArg& arg : args) + if (arg.value.type == Constant::Type_Unknown) + pushLocal(arg.local, arg.reg); + else + locstants[arg.local] = arg.value; - // the inline frame will be used to compile return statements as well as to reject recursive inlining attempts - inlineFrames.push_back({func, oldLocals, target, targetCount}); - } + // the inline frame will be used to compile return statements as well as to reject recursive inlining attempts + inlineFrames.push_back({func, oldLocals, target, targetCount}); // fold constant values updated above into expressions in the function body foldConstants(constants, variables, locstants, builtinsFold, func->body); diff --git a/luau/Compiler/src/Types.cpp b/luau/Compiler/src/Types.cpp new file mode 100644 index 0000000..0204198 --- /dev/null +++ b/luau/Compiler/src/Types.cpp @@ -0,0 +1,106 @@ +// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details +#include "Luau/BytecodeBuilder.h" + +#include "Types.h" + +namespace Luau +{ + +static LuauBytecodeEncodedType getType(AstType* ty) +{ + if (AstTypeReference* ref = ty->as()) + { + if (ref->name == "nil") + return LBC_TYPE_NIL; + else if (ref->name == "boolean") + return LBC_TYPE_BOOLEAN; + else if (ref->name == "number") + return LBC_TYPE_NUMBER; + else if (ref->name == "string") + return LBC_TYPE_STRING; + else if (ref->name == "thread") + return LBC_TYPE_THREAD; + else if (ref->name == "any" || ref->name == "unknown") + return LBC_TYPE_ANY; + } + else if (AstTypeTable* table = ty->as()) + { + return LBC_TYPE_TABLE; + } + else if (AstTypeFunction* func = ty->as()) + { + return LBC_TYPE_FUNCTION; + } + else if (AstTypeUnion* un = ty->as()) + { + bool optional = false; + LuauBytecodeEncodedType type = LBC_TYPE_INVALID; + + for (AstType* ty : un->types) + { + LuauBytecodeEncodedType et = getType(ty); + + if (et == LBC_TYPE_NIL) + { + optional = true; + continue; + } + + if (type == LBC_TYPE_INVALID) + { + type = et; + continue; + } + + if (type != et) + return LBC_TYPE_ANY; + } + + if (type == LBC_TYPE_INVALID) + return LBC_TYPE_ANY; + + return LuauBytecodeEncodedType(type | (optional && (type != LBC_TYPE_ANY) ? LBC_TYPE_OPTIONAL_BIT : 0)); + } + else if (AstTypeIntersection* inter = ty->as()) + { + return LBC_TYPE_ANY; + } + + return LBC_TYPE_ANY; +} + +std::string getFunctionType(const AstExprFunction* func) +{ + if (func->vararg || func->generics.size || func->genericPacks.size) + return {}; + + bool self = func->self != 0; + + std::string typeInfo; + typeInfo.reserve(func->args.size + self + 2); + + typeInfo.push_back(LBC_TYPE_FUNCTION); + typeInfo.push_back(uint8_t(self + func->args.size)); + + if (self) + typeInfo.push_back(LBC_TYPE_TABLE); + + bool haveNonAnyParam = false; + for (AstLocal* arg : func->args) + { + LuauBytecodeEncodedType ty = arg->annotation ? getType(arg->annotation) : LBC_TYPE_ANY; + + if (ty != LBC_TYPE_ANY) + haveNonAnyParam = true; + + typeInfo.push_back(ty); + } + + // If all parameters simplify to any, we can just omit type info for this function + if (!haveNonAnyParam) + return {}; + + return typeInfo; +} + +} // namespace Luau \ No newline at end of file diff --git a/luau/Compiler/src/Types.h b/luau/Compiler/src/Types.h new file mode 100644 index 0000000..1be9155 --- /dev/null +++ b/luau/Compiler/src/Types.h @@ -0,0 +1,9 @@ +// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details +#pragma once + +#include "Luau/Ast.h" + +namespace Luau +{ +std::string getFunctionType(const AstExprFunction* func); +} // namespace Luau diff --git a/luau/VM/src/ldo.cpp b/luau/VM/src/ldo.cpp index 7f58d96..e5fde4d 100644 --- a/luau/VM/src/ldo.cpp +++ b/luau/VM/src/ldo.cpp @@ -17,8 +17,6 @@ #include -LUAU_FASTFLAGVARIABLE(LuauUniformTopHandling, false) - /* ** {====================================================== ** Error-recovery functions @@ -252,7 +250,7 @@ void luaD_call(lua_State* L, StkId func, int nresults) L->isactive = false; } - if (FFlag::LuauUniformTopHandling && nresults != LUA_MULTRET) + if (nresults != LUA_MULTRET) L->top = restorestack(L, old_func) + nresults; L->nCcalls--; diff --git a/luau/VM/src/lvm.h b/luau/VM/src/lvm.h index cfb6456..5ec7bc1 100644 --- a/luau/VM/src/lvm.h +++ b/luau/VM/src/lvm.h @@ -24,7 +24,6 @@ LUAI_FUNC void luaV_gettable(lua_State* L, const TValue* t, TValue* key, StkId v LUAI_FUNC void luaV_settable(lua_State* L, const TValue* t, TValue* key, StkId val); LUAI_FUNC void luaV_concat(lua_State* L, int total, int last); LUAI_FUNC void luaV_getimport(lua_State* L, Table* env, TValue* k, StkId res, uint32_t id, bool propagatenil); -LUAI_FUNC void luaV_getimport_dep(lua_State* L, Table* env, TValue* k, uint32_t id, bool propagatenil); LUAI_FUNC void luaV_prepareFORN(lua_State* L, StkId plimit, StkId pstep, StkId pinit); LUAI_FUNC void luaV_callTM(lua_State* L, int nparams, int res); LUAI_FUNC void luaV_tryfuncTM(lua_State* L, StkId func); diff --git a/luau/VM/src/lvmexecute.cpp b/luau/VM/src/lvmexecute.cpp index 280c479..90c5a7e 100644 --- a/luau/VM/src/lvmexecute.cpp +++ b/luau/VM/src/lvmexecute.cpp @@ -16,9 +16,6 @@ #include -LUAU_FASTFLAG(LuauUniformTopHandling) -LUAU_FASTFLAG(LuauGetImportDirect) - // Disable c99-designator to avoid the warning in CGOTO dispatch table #ifdef __clang__ #if __has_warning("-Wc99-designator") @@ -433,20 +430,8 @@ reentry: { uint32_t aux = *pc++; - if (FFlag::LuauGetImportDirect) - { - VM_PROTECT(luaV_getimport(L, cl->env, k, ra, aux, /* propagatenil= */ false)); - VM_NEXT(); - } - else - { - VM_PROTECT(luaV_getimport_dep(L, cl->env, k, aux, /* propagatenil= */ false)); - ra = VM_REG(LUAU_INSN_A(insn)); // previous call may change the stack - - setobj2s(L, ra, L->top - 1); - L->top--; - VM_NEXT(); - } + VM_PROTECT(luaV_getimport(L, cl->env, k, ra, aux, /* propagatenil= */ false)); + VM_NEXT(); } } @@ -1043,8 +1028,6 @@ reentry: // we're done! if (LUAU_UNLIKELY(ci->flags & LUA_CALLINFO_RETURN)) { - if (!FFlag::LuauUniformTopHandling) - L->top = res; goto exit; } diff --git a/luau/VM/src/lvmload.cpp b/luau/VM/src/lvmload.cpp index f26cc05..edbe503 100644 --- a/luau/VM/src/lvmload.cpp +++ b/luau/VM/src/lvmload.cpp @@ -13,8 +13,6 @@ #include -LUAU_FASTFLAGVARIABLE(LuauGetImportDirect, false) - // TODO: RAII deallocation doesn't work for longjmp builds if a memory error happens template struct TempBuffer @@ -77,34 +75,6 @@ void luaV_getimport(lua_State* L, Table* env, TValue* k, StkId res, uint32_t id, luaV_gettable(L, res, &k[id2], res); } -void luaV_getimport_dep(lua_State* L, Table* env, TValue* k, uint32_t id, bool propagatenil) -{ - LUAU_ASSERT(!FFlag::LuauGetImportDirect); - - int count = id >> 30; - int id0 = count > 0 ? int(id >> 20) & 1023 : -1; - int id1 = count > 1 ? int(id >> 10) & 1023 : -1; - int id2 = count > 2 ? int(id) & 1023 : -1; - - // allocate a stack slot so that we can do table lookups - luaD_checkstack(L, 1); - setnilvalue(L->top); - L->top++; - - // global lookup into L->top-1 - TValue g; - sethvalue(L, &g, env); - luaV_gettable(L, &g, &k[id0], L->top - 1); - - // table lookup for id1 - if (id1 >= 0 && (!propagatenil || !ttisnil(L->top - 1))) - luaV_gettable(L, L->top - 1, &k[id1], L->top - 1); - - // table lookup for id2 - if (id2 >= 0 && (!propagatenil || !ttisnil(L->top - 1))) - luaV_gettable(L, L->top - 1, &k[id2], L->top - 1); -} - template static T read(const char* data, size_t size, size_t& offset) { @@ -153,17 +123,12 @@ static void resolveImportSafe(lua_State* L, Table* env, TValue* k, uint32_t id) // note: we call getimport with nil propagation which means that accesses to table chains like A.B.C will resolve in nil // this is technically not necessary but it reduces the number of exceptions when loading scripts that rely on getfenv/setfenv for global // injection - if (FFlag::LuauGetImportDirect) - { - // allocate a stack slot so that we can do table lookups - luaD_checkstack(L, 1); - setnilvalue(L->top); - L->top++; + // allocate a stack slot so that we can do table lookups + luaD_checkstack(L, 1); + setnilvalue(L->top); + L->top++; - luaV_getimport(L, L->gt, self->k, L->top - 1, self->id, /* propagatenil= */ true); - } - else - luaV_getimport_dep(L, L->gt, self->k, self->id, /* propagatenil= */ true); + luaV_getimport(L, L->gt, self->k, L->top - 1, self->id, /* propagatenil= */ true); } }; @@ -194,6 +159,8 @@ int luau_load(lua_State* L, const char* chunkname, const char* data, size_t size uint8_t version = read(data, size, offset); + + // 0 means the rest of the bytecode is the error message if (version == 0) { @@ -221,6 +188,13 @@ int luau_load(lua_State* L, const char* chunkname, const char* data, size_t size TString* source = luaS_new(L, chunkname); + + if (version >= 4) + { + uint8_t typesversion = read(data, size, offset); + LUAU_ASSERT(typesversion == 1); + } + // string table unsigned int stringCount = readVarInt(data, size, offset); TempBuffer strings(L, stringCount); @@ -248,6 +222,25 @@ int luau_load(lua_State* L, const char* chunkname, const char* data, size_t size p->nups = read(data, size, offset); p->is_vararg = read(data, size, offset); + if (version >= 4) + { + uint8_t cgflags = read(data, size, offset); + LUAU_ASSERT(cgflags == 0); + + uint32_t typesize = readVarInt(data, size, offset); + + if (typesize) + { + uint8_t* types = (uint8_t*)data + offset; + + LUAU_ASSERT(typesize == unsigned(2 + p->numparams)); + LUAU_ASSERT(types[0] == LBC_TYPE_FUNCTION); + LUAU_ASSERT(types[1] == p->numparams); + + offset += typesize; + } + } + p->sizecode = readVarInt(data, size, offset); p->code = luaM_newarray(L, p->sizecode, Instruction, p->memcat); for (int j = 0; j < p->sizecode; ++j)