v0.3.4+luau533

This commit is contained in:
Alex Orlenko 2022-06-27 13:26:42 +01:00
parent 0691157f4c
commit 3320c56ff5
No known key found for this signature in database
GPG Key ID: 4C150C250863B96D
11 changed files with 135 additions and 24 deletions

View File

@ -1,6 +1,6 @@
[package]
name = "luau0-src"
version = "0.3.3+luau531"
version = "0.3.4+luau533"
authors = ["Aleksandr Orlenko <zxteam@protonmail.com>"]
edition = "2021"
repository = "https://github.com/khvzak/luau-src-rs"

View File

@ -7,7 +7,7 @@
// Creating the bytecode is outside the scope of this file and is handled by bytecode builder (BytecodeBuilder.h) and bytecode compiler (Compiler.h)
// Note that ALL enums declared in this file are order-sensitive since the values are baked into bytecode that needs to be processed by legacy clients.
// Bytecode definitions
// # Bytecode definitions
// Bytecode instructions are using "word code" - each instruction is one or many 32-bit words.
// The first word in the instruction is always the instruction header, and *must* contain the opcode (enum below) in the least significant byte.
//
@ -19,7 +19,7 @@
// Instruction word is sometimes followed by one extra word, indicated as AUX - this is just a 32-bit word and is decoded according to the specification for each opcode.
// For each opcode the encoding is *static* - that is, based on the opcode you know a-priory how large the instruction is, with the exception of NEWCLOSURE
// Bytecode indices
// # Bytecode indices
// Bytecode instructions commonly refer to integer values that define offsets or indices for various entities. For each type, there's a maximum encodable value.
// Note that in some cases, the compiler will set a lower limit than the maximum encodable value is to prevent fragile code into bumping against the limits whenever we change the compilation details.
// Additionally, in some specific instructions such as ANDK, the limit on the encoded value is smaller; this means that if a value is larger, a different instruction must be selected.
@ -29,6 +29,15 @@
// Constants: 0-2^23-1. Constants are stored in a table allocated with each proto; to allow for future bytecode tweaks the encodable value is limited to 23 bits.
// Closures: 0-2^15-1. Closures are created from child protos via a child index; the limit is for the number of closures immediately referenced in each function.
// Jumps: -2^23..2^23. Jump offsets are specified in word increments, so jumping over an instruction may sometimes require an offset of 2 or more.
// # Bytecode versions
// Bytecode serialized format embeds a version number, that dictates both the serialized form as well as the allowed instructions. As long as the bytecode version falls into supported
// range (indicated by LBC_BYTECODE_MIN / LBC_BYTECODE_MAX) and was produced by Luau compiler, it should load and execute correctly.
//
// Note that Luau runtime doesn't provide indefinite bytecode compatibility: support for older versions gets removed over time. As such, bytecode isn't a durable storage format and it's expected
// that Luau users can recompile bytecode from source on Luau version upgrades if necessary.
// Bytecode opcode, part of the instruction header
enum LuauOpcode
{
// NOP: noop
@ -380,8 +389,10 @@ enum LuauOpcode
// Bytecode tags, used internally for bytecode encoded as a string
enum LuauBytecodeTag
{
// Bytecode version
LBC_VERSION = 2,
// Bytecode version; runtime supports [MIN, MAX], compiler emits TARGET by default but may emit a higher version when flags are enabled
LBC_VERSION_MIN = 2,
LBC_VERSION_MAX = 2,
LBC_VERSION_TARGET = 2,
// Types of constant table entries
LBC_CONSTANT_NIL = 0,
LBC_CONSTANT_BOOLEAN,

View File

@ -119,6 +119,8 @@ public:
static std::string getError(const std::string& message);
static uint8_t getVersion();
private:
struct Constant
{

View File

@ -9,6 +9,9 @@
namespace Luau
{
static_assert(LBC_VERSION_TARGET >= LBC_VERSION_MIN && LBC_VERSION_TARGET <= LBC_VERSION_MAX, "Invalid bytecode version setup");
static_assert(LBC_VERSION_MAX <= 127, "Bytecode version should be 7-bit so that we can extend the serialization to use varint transparently");
static const uint32_t kMaxConstantCount = 1 << 23;
static const uint32_t kMaxClosureCount = 1 << 15;
@ -572,7 +575,10 @@ void BytecodeBuilder::finalize()
bytecode.reserve(capacity);
// assemble final bytecode blob
bytecode = char(LBC_VERSION);
uint8_t version = getVersion();
LUAU_ASSERT(version >= LBC_VERSION_MIN && version <= LBC_VERSION_MAX);
bytecode = char(version);
writeStringTable(bytecode);
@ -1040,7 +1046,7 @@ void BytecodeBuilder::expandJumps()
std::string BytecodeBuilder::getError(const std::string& message)
{
// 0 acts as a special marker for error bytecode (it's equal to LBC_VERSION for valid bytecode blobs)
// 0 acts as a special marker for error bytecode (it's equal to LBC_VERSION_TARGET for valid bytecode blobs)
std::string result;
result += char(0);
result += message;
@ -1048,6 +1054,12 @@ std::string BytecodeBuilder::getError(const std::string& message)
return result;
}
uint8_t BytecodeBuilder::getVersion()
{
// This function usually returns LBC_VERSION_TARGET but may sometimes return a higher number (within LBC_VERSION_MIN/MAX) under fast flags
return LBC_VERSION_TARGET;
}
#ifdef LUAU_ASSERTENABLED
void BytecodeBuilder::validate() const
{
@ -1075,6 +1087,8 @@ void BytecodeBuilder::validate() const
LUAU_ASSERT(i <= insns.size());
}
std::vector<uint8_t> openCaptures;
// second pass: validate the rest of the bytecode
for (size_t i = 0; i < insns.size();)
{
@ -1121,6 +1135,8 @@ void BytecodeBuilder::validate() const
case LOP_CLOSEUPVALS:
VREG(LUAU_INSN_A(insn));
while (openCaptures.size() && openCaptures.back() >= LUAU_INSN_A(insn))
openCaptures.pop_back();
break;
case LOP_GETIMPORT:
@ -1388,8 +1404,12 @@ void BytecodeBuilder::validate() const
switch (LUAU_INSN_A(insn))
{
case LCT_VAL:
VREG(LUAU_INSN_B(insn));
break;
case LCT_REF:
VREG(LUAU_INSN_B(insn));
openCaptures.push_back(LUAU_INSN_B(insn));
break;
case LCT_UPVAL:
@ -1409,6 +1429,12 @@ void BytecodeBuilder::validate() const
LUAU_ASSERT(i <= insns.size());
}
// all CAPTURE REF instructions must have a CLOSEUPVALS instruction after them in the bytecode stream
// this doesn't guarantee safety as it doesn't perform basic block based analysis, but if this fails
// then the bytecode is definitely unsafe to run since the compiler won't generate backwards branches
// except for loop edges
LUAU_ASSERT(openCaptures.empty());
#undef VREG
#undef VREGEND
#undef VUPVAL

View File

@ -16,8 +16,6 @@
#include <bitset>
#include <math.h>
LUAU_FASTFLAGVARIABLE(LuauCompileIterNoPairs, false)
LUAU_FASTINTVARIABLE(LuauCompileLoopUnrollThreshold, 25)
LUAU_FASTINTVARIABLE(LuauCompileLoopUnrollThresholdMaxBoost, 300)
@ -246,6 +244,14 @@ struct Compiler
f.canInline = true;
f.stackSize = stackSize;
f.costModel = modelCost(func->body, func->args.data, func->args.size);
// track functions that only ever return a single value so that we can convert multret calls to fixedret calls
if (allPathsEndWithReturn(func->body))
{
ReturnVisitor returnVisitor(this);
stat->visit(&returnVisitor);
f.returnsOne = returnVisitor.returnsOne;
}
}
upvals.clear(); // note: instead of std::move above, we copy & clear to preserve capacity for future pushes
@ -260,6 +266,19 @@ struct Compiler
{
if (AstExprCall* expr = node->as<AstExprCall>())
{
// Optimization: convert multret calls to functions that always return one value to fixedret calls; this facilitates inlining
if (options.optimizationLevel >= 2)
{
AstExprFunction* func = getFunctionExpr(expr->func);
Function* fi = func ? functions.find(func) : nullptr;
if (fi && fi->returnsOne)
{
compileExprTemp(node, target);
return false;
}
}
// We temporarily swap out regTop to have targetTop work correctly...
// This is a crude hack but it's necessary for correctness :(
RegScope rs(this, target);
@ -447,7 +466,9 @@ struct Compiler
return false;
}
// TODO: we can compile multret functions if all returns of the function are multret as well
// we can't inline multret functions because the caller expects L->top to be adjusted:
// - inlined return compiles to a JUMP, and we don't have an instruction that adjusts L->top arbitrarily
// - even if we did, right now all L->top adjustments are immediately consumed by the next instruction, and for now we want to preserve that
if (multRet)
{
bytecode.addDebugRemark("inlining failed: can't convert fixed returns to multret");
@ -492,7 +513,7 @@ struct Compiler
size_t oldLocals = localStack.size();
// note that we push the frame early; this is needed to block recursive inline attempts
inlineFrames.push_back({func, target, targetCount});
inlineFrames.push_back({func, oldLocals, target, targetCount});
// evaluate all arguments; note that we don't emit code for constant arguments (relying on constant folding)
for (size_t i = 0; i < func->args.size; ++i)
@ -593,6 +614,8 @@ struct Compiler
{
for (size_t i = 0; i < targetCount; ++i)
bytecode.emitABC(LOP_LOADNIL, uint8_t(target + i), 0, 0);
closeLocals(oldLocals);
}
popLocals(oldLocals);
@ -2355,6 +2378,8 @@ struct Compiler
compileExprListTemp(stat->list, frame.target, frame.targetCount, /* targetTop= */ false);
closeLocals(frame.localOffset);
if (!fallthrough)
{
size_t jumpLabel = bytecode.emitLabel();
@ -2645,7 +2670,7 @@ struct Compiler
else if (builtin.isGlobal("pairs")) // for .. in pairs(t)
{
skipOp = LOP_FORGPREP_NEXT;
loopOp = FFlag::LuauCompileIterNoPairs ? LOP_FORGLOOP : LOP_FORGLOOP_NEXT;
loopOp = LOP_FORGLOOP;
}
}
else if (stat->values.size == 2)
@ -2655,7 +2680,7 @@ struct Compiler
if (builtin.isGlobal("next")) // for .. in next,t
{
skipOp = LOP_FORGPREP_NEXT;
loopOp = FFlag::LuauCompileIterNoPairs ? LOP_FORGLOOP : LOP_FORGLOOP_NEXT;
loopOp = LOP_FORGLOOP;
}
}
}
@ -3316,6 +3341,48 @@ struct Compiler
std::vector<AstLocal*> upvals;
};
struct ReturnVisitor: AstVisitor
{
Compiler* self;
bool returnsOne = true;
ReturnVisitor(Compiler* self)
: self(self)
{
}
bool visit(AstExpr* expr) override
{
return false;
}
bool visit(AstStatReturn* stat) override
{
if (stat->list.size == 1)
{
AstExpr* value = stat->list.data[0];
if (AstExprCall* expr = value->as<AstExprCall>())
{
AstExprFunction* func = self->getFunctionExpr(expr->func);
Function* fi = func ? self->functions.find(func) : nullptr;
returnsOne &= fi && fi->returnsOne;
}
else if (value->is<AstExprVarargs>())
{
returnsOne = false;
}
}
else
{
returnsOne = false;
}
return false;
}
};
struct RegScope
{
RegScope(Compiler* self)
@ -3351,6 +3418,7 @@ struct Compiler
uint64_t costModel = 0;
unsigned int stackSize = 0;
bool canInline = false;
bool returnsOne = false;
};
struct Local
@ -3384,6 +3452,8 @@ struct Compiler
{
AstExprFunction* func;
size_t localOffset;
uint8_t target;
uint8_t targetCount;

View File

@ -418,7 +418,7 @@ typedef struct Table
CommonHeader;
uint8_t flags; /* 1<<p means tagmethod(p) is not present */
uint8_t tmcache; /* 1<<p means tagmethod(p) is not present */
uint8_t readonly; /* sandboxing feature to prohibit writes to table */
uint8_t safeenv; /* environment doesn't share globals with other scripts */
uint8_t lsizenode; /* log2 of size of `node' array */

View File

@ -45,7 +45,7 @@ static_assert(TKey{{NULL}, {0}, LUA_TNIL, MAXSIZE - 1}.next == MAXSIZE - 1, "not
static_assert(TKey{{NULL}, {0}, LUA_TNIL, -(MAXSIZE - 1)}.next == -(MAXSIZE - 1), "not enough bits for next");
// reset cache of absent metamethods, cache is updated in luaT_gettm
#define invalidateTMcache(t) t->flags = 0
#define invalidateTMcache(t) t->tmcache = 0
// empty hash data points to dummynode so that we can always dereference it
const LuaNode luaH_dummynode = {
@ -479,7 +479,7 @@ Table* luaH_new(lua_State* L, int narray, int nhash)
Table* t = luaM_newgco(L, Table, sizeof(Table), L->activememcat);
luaC_init(L, t, LUA_TTABLE);
t->metatable = NULL;
t->flags = cast_byte(~0);
t->tmcache = cast_byte(~0);
t->array = NULL;
t->sizearray = 0;
t->lastfree = 0;
@ -778,7 +778,7 @@ Table* luaH_clone(lua_State* L, Table* tt)
Table* t = luaM_newgco(L, Table, sizeof(Table), L->activememcat);
luaC_init(L, t, LUA_TTABLE);
t->metatable = tt->metatable;
t->flags = tt->flags;
t->tmcache = tt->tmcache;
t->array = NULL;
t->sizearray = 0;
t->lsizenode = 0;
@ -835,5 +835,5 @@ void luaH_clear(Table* tt)
}
/* back to empty -> no tag methods present */
tt->flags = cast_byte(~0);
tt->tmcache = cast_byte(~0);
}

View File

@ -88,8 +88,8 @@ const TValue* luaT_gettm(Table* events, TMS event, TString* ename)
const TValue* tm = luaH_getstr(events, ename);
LUAU_ASSERT(event <= TM_EQ);
if (ttisnil(tm))
{ /* no tag method? */
events->flags |= cast_byte(1u << event); /* cache this fact */
{ /* no tag method? */
events->tmcache |= cast_byte(1u << event); /* cache this fact */
return NULL;
}
else

View File

@ -41,10 +41,10 @@ typedef enum
} TMS;
// clang-format on
#define gfasttm(g, et, e) ((et) == NULL ? NULL : ((et)->flags & (1u << (e))) ? NULL : luaT_gettm(et, e, (g)->tmname[e]))
#define gfasttm(g, et, e) ((et) == NULL ? NULL : ((et)->tmcache & (1u << (e))) ? NULL : luaT_gettm(et, e, (g)->tmname[e]))
#define fasttm(l, et, e) gfasttm(l->global, et, e)
#define fastnotm(et, e) ((et) == NULL || ((et)->flags & (1u << (e))))
#define fastnotm(et, e) ((et) == NULL || ((et)->tmcache & (1u << (e))))
LUAI_DATA const char* const luaT_typenames[];
LUAI_DATA const char* const luaT_eventname[];

View File

@ -26,6 +26,8 @@ void luaU_freeudata(lua_State* L, Udata* u, lua_Page* page)
{
void (*dtor)(lua_State*, void*) = nullptr;
dtor = L->global->udatagc[u->tag];
// TODO: access to L here is highly unsafe since this is called during internal GC traversal
// certain operations such as lua_getthreaddata are okay, but by and large this risks crashes on improper use
if (dtor)
dtor(L, u->data);
}

View File

@ -154,11 +154,11 @@ int luau_load(lua_State* L, const char* chunkname, const char* data, size_t size
return 1;
}
if (version != LBC_VERSION)
if (version < LBC_VERSION_MIN || version > LBC_VERSION_MAX)
{
char chunkid[LUA_IDSIZE];
luaO_chunkid(chunkid, chunkname, LUA_IDSIZE);
lua_pushfstring(L, "%s: bytecode version mismatch (expected %d, got %d)", chunkid, LBC_VERSION, version);
lua_pushfstring(L, "%s: bytecode version mismatch (expected [%d..%d], got %d)", chunkid, LBC_VERSION_MIN, LBC_VERSION_MAX, version);
return 1;
}