diff --git a/Cargo.toml b/Cargo.toml index aae1e76..576f068 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "luau0-src" -version = "0.3.3+luau531" +version = "0.3.4+luau533" authors = ["Aleksandr Orlenko "] edition = "2021" repository = "https://github.com/khvzak/luau-src-rs" diff --git a/luau/Common/include/Luau/Bytecode.h b/luau/Common/include/Luau/Bytecode.h index f71d893..218bb5d 100644 --- a/luau/Common/include/Luau/Bytecode.h +++ b/luau/Common/include/Luau/Bytecode.h @@ -7,7 +7,7 @@ // Creating the bytecode is outside the scope of this file and is handled by bytecode builder (BytecodeBuilder.h) and bytecode compiler (Compiler.h) // Note that ALL enums declared in this file are order-sensitive since the values are baked into bytecode that needs to be processed by legacy clients. -// Bytecode definitions +// # Bytecode definitions // Bytecode instructions are using "word code" - each instruction is one or many 32-bit words. // The first word in the instruction is always the instruction header, and *must* contain the opcode (enum below) in the least significant byte. // @@ -19,7 +19,7 @@ // Instruction word is sometimes followed by one extra word, indicated as AUX - this is just a 32-bit word and is decoded according to the specification for each opcode. // For each opcode the encoding is *static* - that is, based on the opcode you know a-priory how large the instruction is, with the exception of NEWCLOSURE -// Bytecode indices +// # Bytecode indices // Bytecode instructions commonly refer to integer values that define offsets or indices for various entities. For each type, there's a maximum encodable value. // Note that in some cases, the compiler will set a lower limit than the maximum encodable value is to prevent fragile code into bumping against the limits whenever we change the compilation details. // Additionally, in some specific instructions such as ANDK, the limit on the encoded value is smaller; this means that if a value is larger, a different instruction must be selected. @@ -29,6 +29,15 @@ // Constants: 0-2^23-1. Constants are stored in a table allocated with each proto; to allow for future bytecode tweaks the encodable value is limited to 23 bits. // Closures: 0-2^15-1. Closures are created from child protos via a child index; the limit is for the number of closures immediately referenced in each function. // Jumps: -2^23..2^23. Jump offsets are specified in word increments, so jumping over an instruction may sometimes require an offset of 2 or more. + +// # Bytecode versions +// Bytecode serialized format embeds a version number, that dictates both the serialized form as well as the allowed instructions. As long as the bytecode version falls into supported +// range (indicated by LBC_BYTECODE_MIN / LBC_BYTECODE_MAX) and was produced by Luau compiler, it should load and execute correctly. +// +// Note that Luau runtime doesn't provide indefinite bytecode compatibility: support for older versions gets removed over time. As such, bytecode isn't a durable storage format and it's expected +// that Luau users can recompile bytecode from source on Luau version upgrades if necessary. + +// Bytecode opcode, part of the instruction header enum LuauOpcode { // NOP: noop @@ -380,8 +389,10 @@ enum LuauOpcode // Bytecode tags, used internally for bytecode encoded as a string enum LuauBytecodeTag { - // Bytecode version - LBC_VERSION = 2, + // Bytecode version; runtime supports [MIN, MAX], compiler emits TARGET by default but may emit a higher version when flags are enabled + LBC_VERSION_MIN = 2, + LBC_VERSION_MAX = 2, + LBC_VERSION_TARGET = 2, // Types of constant table entries LBC_CONSTANT_NIL = 0, LBC_CONSTANT_BOOLEAN, diff --git a/luau/Compiler/include/Luau/BytecodeBuilder.h b/luau/Compiler/include/Luau/BytecodeBuilder.h index dbe5429..6ec10b5 100644 --- a/luau/Compiler/include/Luau/BytecodeBuilder.h +++ b/luau/Compiler/include/Luau/BytecodeBuilder.h @@ -119,6 +119,8 @@ public: static std::string getError(const std::string& message); + static uint8_t getVersion(); + private: struct Constant { diff --git a/luau/Compiler/src/BytecodeBuilder.cpp b/luau/Compiler/src/BytecodeBuilder.cpp index 597b2f0..301cf25 100644 --- a/luau/Compiler/src/BytecodeBuilder.cpp +++ b/luau/Compiler/src/BytecodeBuilder.cpp @@ -9,6 +9,9 @@ namespace Luau { +static_assert(LBC_VERSION_TARGET >= LBC_VERSION_MIN && LBC_VERSION_TARGET <= LBC_VERSION_MAX, "Invalid bytecode version setup"); +static_assert(LBC_VERSION_MAX <= 127, "Bytecode version should be 7-bit so that we can extend the serialization to use varint transparently"); + static const uint32_t kMaxConstantCount = 1 << 23; static const uint32_t kMaxClosureCount = 1 << 15; @@ -572,7 +575,10 @@ void BytecodeBuilder::finalize() bytecode.reserve(capacity); // assemble final bytecode blob - bytecode = char(LBC_VERSION); + uint8_t version = getVersion(); + LUAU_ASSERT(version >= LBC_VERSION_MIN && version <= LBC_VERSION_MAX); + + bytecode = char(version); writeStringTable(bytecode); @@ -1040,7 +1046,7 @@ void BytecodeBuilder::expandJumps() std::string BytecodeBuilder::getError(const std::string& message) { - // 0 acts as a special marker for error bytecode (it's equal to LBC_VERSION for valid bytecode blobs) + // 0 acts as a special marker for error bytecode (it's equal to LBC_VERSION_TARGET for valid bytecode blobs) std::string result; result += char(0); result += message; @@ -1048,6 +1054,12 @@ std::string BytecodeBuilder::getError(const std::string& message) return result; } +uint8_t BytecodeBuilder::getVersion() +{ + // This function usually returns LBC_VERSION_TARGET but may sometimes return a higher number (within LBC_VERSION_MIN/MAX) under fast flags + return LBC_VERSION_TARGET; +} + #ifdef LUAU_ASSERTENABLED void BytecodeBuilder::validate() const { @@ -1075,6 +1087,8 @@ void BytecodeBuilder::validate() const LUAU_ASSERT(i <= insns.size()); } + std::vector openCaptures; + // second pass: validate the rest of the bytecode for (size_t i = 0; i < insns.size();) { @@ -1121,6 +1135,8 @@ void BytecodeBuilder::validate() const case LOP_CLOSEUPVALS: VREG(LUAU_INSN_A(insn)); + while (openCaptures.size() && openCaptures.back() >= LUAU_INSN_A(insn)) + openCaptures.pop_back(); break; case LOP_GETIMPORT: @@ -1388,8 +1404,12 @@ void BytecodeBuilder::validate() const switch (LUAU_INSN_A(insn)) { case LCT_VAL: + VREG(LUAU_INSN_B(insn)); + break; + case LCT_REF: VREG(LUAU_INSN_B(insn)); + openCaptures.push_back(LUAU_INSN_B(insn)); break; case LCT_UPVAL: @@ -1409,6 +1429,12 @@ void BytecodeBuilder::validate() const LUAU_ASSERT(i <= insns.size()); } + // all CAPTURE REF instructions must have a CLOSEUPVALS instruction after them in the bytecode stream + // this doesn't guarantee safety as it doesn't perform basic block based analysis, but if this fails + // then the bytecode is definitely unsafe to run since the compiler won't generate backwards branches + // except for loop edges + LUAU_ASSERT(openCaptures.empty()); + #undef VREG #undef VREGEND #undef VUPVAL diff --git a/luau/Compiler/src/Compiler.cpp b/luau/Compiler/src/Compiler.cpp index 7431cde..e732256 100644 --- a/luau/Compiler/src/Compiler.cpp +++ b/luau/Compiler/src/Compiler.cpp @@ -16,8 +16,6 @@ #include #include -LUAU_FASTFLAGVARIABLE(LuauCompileIterNoPairs, false) - LUAU_FASTINTVARIABLE(LuauCompileLoopUnrollThreshold, 25) LUAU_FASTINTVARIABLE(LuauCompileLoopUnrollThresholdMaxBoost, 300) @@ -246,6 +244,14 @@ struct Compiler f.canInline = true; f.stackSize = stackSize; f.costModel = modelCost(func->body, func->args.data, func->args.size); + + // track functions that only ever return a single value so that we can convert multret calls to fixedret calls + if (allPathsEndWithReturn(func->body)) + { + ReturnVisitor returnVisitor(this); + stat->visit(&returnVisitor); + f.returnsOne = returnVisitor.returnsOne; + } } upvals.clear(); // note: instead of std::move above, we copy & clear to preserve capacity for future pushes @@ -260,6 +266,19 @@ struct Compiler { if (AstExprCall* expr = node->as()) { + // Optimization: convert multret calls to functions that always return one value to fixedret calls; this facilitates inlining + if (options.optimizationLevel >= 2) + { + AstExprFunction* func = getFunctionExpr(expr->func); + Function* fi = func ? functions.find(func) : nullptr; + + if (fi && fi->returnsOne) + { + compileExprTemp(node, target); + return false; + } + } + // We temporarily swap out regTop to have targetTop work correctly... // This is a crude hack but it's necessary for correctness :( RegScope rs(this, target); @@ -447,7 +466,9 @@ struct Compiler return false; } - // TODO: we can compile multret functions if all returns of the function are multret as well + // we can't inline multret functions because the caller expects L->top to be adjusted: + // - inlined return compiles to a JUMP, and we don't have an instruction that adjusts L->top arbitrarily + // - even if we did, right now all L->top adjustments are immediately consumed by the next instruction, and for now we want to preserve that if (multRet) { bytecode.addDebugRemark("inlining failed: can't convert fixed returns to multret"); @@ -492,7 +513,7 @@ struct Compiler size_t oldLocals = localStack.size(); // note that we push the frame early; this is needed to block recursive inline attempts - inlineFrames.push_back({func, target, targetCount}); + inlineFrames.push_back({func, oldLocals, target, targetCount}); // evaluate all arguments; note that we don't emit code for constant arguments (relying on constant folding) for (size_t i = 0; i < func->args.size; ++i) @@ -593,6 +614,8 @@ struct Compiler { for (size_t i = 0; i < targetCount; ++i) bytecode.emitABC(LOP_LOADNIL, uint8_t(target + i), 0, 0); + + closeLocals(oldLocals); } popLocals(oldLocals); @@ -2355,6 +2378,8 @@ struct Compiler compileExprListTemp(stat->list, frame.target, frame.targetCount, /* targetTop= */ false); + closeLocals(frame.localOffset); + if (!fallthrough) { size_t jumpLabel = bytecode.emitLabel(); @@ -2645,7 +2670,7 @@ struct Compiler else if (builtin.isGlobal("pairs")) // for .. in pairs(t) { skipOp = LOP_FORGPREP_NEXT; - loopOp = FFlag::LuauCompileIterNoPairs ? LOP_FORGLOOP : LOP_FORGLOOP_NEXT; + loopOp = LOP_FORGLOOP; } } else if (stat->values.size == 2) @@ -2655,7 +2680,7 @@ struct Compiler if (builtin.isGlobal("next")) // for .. in next,t { skipOp = LOP_FORGPREP_NEXT; - loopOp = FFlag::LuauCompileIterNoPairs ? LOP_FORGLOOP : LOP_FORGLOOP_NEXT; + loopOp = LOP_FORGLOOP; } } } @@ -3316,6 +3341,48 @@ struct Compiler std::vector upvals; }; + struct ReturnVisitor: AstVisitor + { + Compiler* self; + bool returnsOne = true; + + ReturnVisitor(Compiler* self) + : self(self) + { + } + + bool visit(AstExpr* expr) override + { + return false; + } + + bool visit(AstStatReturn* stat) override + { + if (stat->list.size == 1) + { + AstExpr* value = stat->list.data[0]; + + if (AstExprCall* expr = value->as()) + { + AstExprFunction* func = self->getFunctionExpr(expr->func); + Function* fi = func ? self->functions.find(func) : nullptr; + + returnsOne &= fi && fi->returnsOne; + } + else if (value->is()) + { + returnsOne = false; + } + } + else + { + returnsOne = false; + } + + return false; + } + }; + struct RegScope { RegScope(Compiler* self) @@ -3351,6 +3418,7 @@ struct Compiler uint64_t costModel = 0; unsigned int stackSize = 0; bool canInline = false; + bool returnsOne = false; }; struct Local @@ -3384,6 +3452,8 @@ struct Compiler { AstExprFunction* func; + size_t localOffset; + uint8_t target; uint8_t targetCount; diff --git a/luau/VM/src/lobject.h b/luau/VM/src/lobject.h index 5e02c2e..bdcb85c 100644 --- a/luau/VM/src/lobject.h +++ b/luau/VM/src/lobject.h @@ -418,7 +418,7 @@ typedef struct Table CommonHeader; - uint8_t flags; /* 1<

flags = 0 +#define invalidateTMcache(t) t->tmcache = 0 // empty hash data points to dummynode so that we can always dereference it const LuaNode luaH_dummynode = { @@ -479,7 +479,7 @@ Table* luaH_new(lua_State* L, int narray, int nhash) Table* t = luaM_newgco(L, Table, sizeof(Table), L->activememcat); luaC_init(L, t, LUA_TTABLE); t->metatable = NULL; - t->flags = cast_byte(~0); + t->tmcache = cast_byte(~0); t->array = NULL; t->sizearray = 0; t->lastfree = 0; @@ -778,7 +778,7 @@ Table* luaH_clone(lua_State* L, Table* tt) Table* t = luaM_newgco(L, Table, sizeof(Table), L->activememcat); luaC_init(L, t, LUA_TTABLE); t->metatable = tt->metatable; - t->flags = tt->flags; + t->tmcache = tt->tmcache; t->array = NULL; t->sizearray = 0; t->lsizenode = 0; @@ -835,5 +835,5 @@ void luaH_clear(Table* tt) } /* back to empty -> no tag methods present */ - tt->flags = cast_byte(~0); + tt->tmcache = cast_byte(~0); } diff --git a/luau/VM/src/ltm.cpp b/luau/VM/src/ltm.cpp index 9b99506..e7df4e5 100644 --- a/luau/VM/src/ltm.cpp +++ b/luau/VM/src/ltm.cpp @@ -88,8 +88,8 @@ const TValue* luaT_gettm(Table* events, TMS event, TString* ename) const TValue* tm = luaH_getstr(events, ename); LUAU_ASSERT(event <= TM_EQ); if (ttisnil(tm)) - { /* no tag method? */ - events->flags |= cast_byte(1u << event); /* cache this fact */ + { /* no tag method? */ + events->tmcache |= cast_byte(1u << event); /* cache this fact */ return NULL; } else diff --git a/luau/VM/src/ltm.h b/luau/VM/src/ltm.h index e1b95c2..a522394 100644 --- a/luau/VM/src/ltm.h +++ b/luau/VM/src/ltm.h @@ -41,10 +41,10 @@ typedef enum } TMS; // clang-format on -#define gfasttm(g, et, e) ((et) == NULL ? NULL : ((et)->flags & (1u << (e))) ? NULL : luaT_gettm(et, e, (g)->tmname[e])) +#define gfasttm(g, et, e) ((et) == NULL ? NULL : ((et)->tmcache & (1u << (e))) ? NULL : luaT_gettm(et, e, (g)->tmname[e])) #define fasttm(l, et, e) gfasttm(l->global, et, e) -#define fastnotm(et, e) ((et) == NULL || ((et)->flags & (1u << (e)))) +#define fastnotm(et, e) ((et) == NULL || ((et)->tmcache & (1u << (e)))) LUAI_DATA const char* const luaT_typenames[]; LUAI_DATA const char* const luaT_eventname[]; diff --git a/luau/VM/src/ludata.cpp b/luau/VM/src/ludata.cpp index 2815268..c2110cb 100644 --- a/luau/VM/src/ludata.cpp +++ b/luau/VM/src/ludata.cpp @@ -26,6 +26,8 @@ void luaU_freeudata(lua_State* L, Udata* u, lua_Page* page) { void (*dtor)(lua_State*, void*) = nullptr; dtor = L->global->udatagc[u->tag]; + // TODO: access to L here is highly unsafe since this is called during internal GC traversal + // certain operations such as lua_getthreaddata are okay, but by and large this risks crashes on improper use if (dtor) dtor(L, u->data); } diff --git a/luau/VM/src/lvmload.cpp b/luau/VM/src/lvmload.cpp index 8b742f1..86afddd 100644 --- a/luau/VM/src/lvmload.cpp +++ b/luau/VM/src/lvmload.cpp @@ -154,11 +154,11 @@ int luau_load(lua_State* L, const char* chunkname, const char* data, size_t size return 1; } - if (version != LBC_VERSION) + if (version < LBC_VERSION_MIN || version > LBC_VERSION_MAX) { char chunkid[LUA_IDSIZE]; luaO_chunkid(chunkid, chunkname, LUA_IDSIZE); - lua_pushfstring(L, "%s: bytecode version mismatch (expected %d, got %d)", chunkid, LBC_VERSION, version); + lua_pushfstring(L, "%s: bytecode version mismatch (expected [%d..%d], got %d)", chunkid, LBC_VERSION_MIN, LBC_VERSION_MAX, version); return 1; }