v0.3.0+luau526

This commit is contained in:
Alex Orlenko 2022-05-07 01:10:54 +01:00
parent 4e923b679b
commit ba1f6ddd7f
No known key found for this signature in database
GPG Key ID: 4C150C250863B96D
31 changed files with 1297 additions and 251 deletions

View File

@ -1,6 +1,6 @@
[package]
name = "luau0-src"
version = "0.2.2+luau521"
version = "0.3.0+luau526"
authors = ["Aleksandr Orlenko <zxteam@protonmail.com>"]
edition = "2018"
repository = "https://github.com/khvzak/luau-src-rs"

View File

@ -313,7 +313,7 @@ template<typename T>
struct AstArray
{
T* data;
std::size_t size;
size_t size;
const T* begin() const
{

View File

@ -32,6 +32,7 @@ class DenseHashTable
{
public:
class const_iterator;
class iterator;
DenseHashTable(const Key& empty_key, size_t buckets = 0)
: count(0)
@ -43,7 +44,7 @@ public:
// don't move this to initializer list! this works around an MSVC codegen issue on AMD CPUs:
// https://developercommunity.visualstudio.com/t/stdvector-constructor-from-size-t-is-25-times-slow/1546547
if (buckets)
data.resize(buckets, ItemInterface::create(empty_key));
resize_data<Item>(buckets);
}
void clear()
@ -125,7 +126,7 @@ public:
if (data.empty() && data.capacity() >= newsize)
{
LUAU_ASSERT(count == 0);
data.resize(newsize, ItemInterface::create(empty_key));
resize_data<Item>(newsize);
return;
}
@ -169,6 +170,21 @@ public:
return const_iterator(this, data.size());
}
iterator begin()
{
size_t start = 0;
while (start < data.size() && eq(ItemInterface::getKey(data[start]), empty_key))
start++;
return iterator(this, start);
}
iterator end()
{
return iterator(this, data.size());
}
size_t size() const
{
return count;
@ -233,7 +249,82 @@ public:
size_t index;
};
class iterator
{
public:
iterator()
: set(0)
, index(0)
{
}
iterator(DenseHashTable<Key, Item, MutableItem, ItemInterface, Hash, Eq>* set, size_t index)
: set(set)
, index(index)
{
}
MutableItem& operator*() const
{
return *reinterpret_cast<MutableItem*>(&set->data[index]);
}
MutableItem* operator->() const
{
return reinterpret_cast<MutableItem*>(&set->data[index]);
}
bool operator==(const iterator& other) const
{
return set == other.set && index == other.index;
}
bool operator!=(const iterator& other) const
{
return set != other.set || index != other.index;
}
iterator& operator++()
{
size_t size = set->data.size();
do
{
index++;
} while (index < size && set->eq(ItemInterface::getKey(set->data[index]), set->empty_key));
return *this;
}
iterator operator++(int)
{
iterator res = *this;
++*this;
return res;
}
private:
DenseHashTable<Key, Item, MutableItem, ItemInterface, Hash, Eq>* set;
size_t index;
};
private:
template<typename T>
void resize_data(size_t count, typename std::enable_if_t<std::is_copy_assignable_v<T>>* dummy = nullptr)
{
data.resize(count, ItemInterface::create(empty_key));
}
template<typename T>
void resize_data(size_t count, typename std::enable_if_t<!std::is_copy_assignable_v<T>>* dummy = nullptr)
{
size_t size = data.size();
data.resize(count);
for (size_t i = size; i < count; i++)
data[i].first = empty_key;
}
std::vector<Item> data;
size_t count;
Key empty_key;
@ -290,6 +381,7 @@ class DenseHashSet
public:
typedef typename Impl::const_iterator const_iterator;
typedef typename Impl::iterator iterator;
DenseHashSet(const Key& empty_key, size_t buckets = 0)
: impl(empty_key, buckets)
@ -336,6 +428,16 @@ public:
{
return impl.end();
}
iterator begin()
{
return impl.begin();
}
iterator end()
{
return impl.end();
}
};
// This is a faster alternative of unordered_map, but it does not implement the same interface (i.e. it does not support erasing and has
@ -348,6 +450,7 @@ class DenseHashMap
public:
typedef typename Impl::const_iterator const_iterator;
typedef typename Impl::iterator iterator;
DenseHashMap(const Key& empty_key, size_t buckets = 0)
: impl(empty_key, buckets)
@ -401,10 +504,21 @@ public:
{
return impl.begin();
}
const_iterator end() const
{
return impl.end();
}
iterator begin()
{
return impl.begin();
}
iterator end()
{
return impl.end();
}
};
} // namespace Luau

View File

@ -173,7 +173,7 @@ public:
}
const Lexeme& next();
const Lexeme& next(bool skipComments);
const Lexeme& next(bool skipComments, bool updatePrevLocation);
void nextline();
Lexeme lookahead();

View File

@ -19,6 +19,7 @@ std::string format(const char* fmt, ...) LUAU_PRINTF_ATTR(1, 2);
std::string vformat(const char* fmt, va_list args);
void formatAppend(std::string& str, const char* fmt, ...) LUAU_PRINTF_ATTR(2, 3);
void vformatAppend(std::string& ret, const char* fmt, va_list args);
std::string join(const std::vector<std::string_view>& segments, std::string_view delimiter);
std::string join(const std::vector<std::string>& segments, std::string_view delimiter);

View File

@ -9,14 +9,21 @@
LUAU_FASTFLAG(DebugLuauTimeTracing)
namespace Luau
{
namespace TimeTrace
{
double getClock();
uint32_t getClockMicroseconds();
} // namespace TimeTrace
} // namespace Luau
#if defined(LUAU_ENABLE_TIME_TRACE)
namespace Luau
{
namespace TimeTrace
{
uint32_t getClockMicroseconds();
struct Token
{
const char* name;

View File

@ -6,8 +6,6 @@
#include <limits.h>
LUAU_FASTFLAGVARIABLE(LuauParseLocationIgnoreCommentSkip, false)
namespace Luau
{
@ -349,13 +347,11 @@ void Lexer::setReadNames(bool read)
const Lexeme& Lexer::next()
{
return next(this->skipComments);
return next(this->skipComments, true);
}
const Lexeme& Lexer::next(bool skipComments)
const Lexeme& Lexer::next(bool skipComments, bool updatePrevLocation)
{
bool first = true;
// in skipComments mode we reject valid comments
do
{
@ -363,11 +359,11 @@ const Lexeme& Lexer::next(bool skipComments)
while (isSpace(peekch()))
consume();
if (!FFlag::LuauParseLocationIgnoreCommentSkip || first)
if (updatePrevLocation)
prevLocation = lexeme.location;
lexeme = readNext();
first = false;
updatePrevLocation = false;
} while (skipComments && (lexeme.type == Lexeme::Comment || lexeme.type == Lexeme::BlockComment));
return lexeme;

View File

@ -10,6 +10,7 @@
// See docs/SyntaxChanges.md for an explanation.
LUAU_FASTINTVARIABLE(LuauRecursionLimit, 1000)
LUAU_FASTINTVARIABLE(LuauParseErrorLimit, 100)
LUAU_FASTFLAGVARIABLE(LuauParseLocationIgnoreCommentSkipInCapture, false)
namespace Luau
{
@ -165,6 +166,7 @@ Parser::Parser(const char* buffer, size_t bufferSize, AstNameTable& names, Alloc
Function top;
top.vararg = true;
functionStack.reserve(8);
functionStack.push_back(top);
nameSelf = names.addStatic("self");
@ -184,6 +186,13 @@ Parser::Parser(const char* buffer, size_t bufferSize, AstNameTable& names, Alloc
// all hot comments parsed after the first non-comment lexeme are special in that they don't affect type checking / linting mode
hotcommentHeader = false;
// preallocate some buffers that are very likely to grow anyway; this works around std::vector's inefficient growth policy for small arrays
localStack.reserve(16);
scratchStat.reserve(16);
scratchExpr.reserve(16);
scratchLocal.reserve(16);
scratchBinding.reserve(16);
}
bool Parser::blockFollow(const Lexeme& l)
@ -1420,6 +1429,11 @@ AstType* Parser::parseTypeAnnotation(TempVector<AstType*>& parts, const Location
parts.push_back(parseSimpleTypeAnnotation(/* allowPack= */ false).type);
isIntersection = true;
}
else if (c == Lexeme::Dot3)
{
report(lexer.current().location, "Unexpected '...' after type annotation");
nextLexeme();
}
else
break;
}
@ -1536,6 +1550,11 @@ AstTypeOrPack Parser::parseSimpleTypeAnnotation(bool allowPack)
prefix = name.name;
name = parseIndexName("field name", pointPosition);
}
else if (lexer.current().type == Lexeme::Dot3)
{
report(lexer.current().location, "Unexpected '...' after type name; type pack is not allowed in this context");
nextLexeme();
}
else if (name.name == "typeof")
{
Lexeme typeofBegin = lexer.current();
@ -2778,7 +2797,7 @@ void Parser::nextLexeme()
{
if (options.captureComments)
{
Lexeme::Type type = lexer.next(/* skipComments= */ false).type;
Lexeme::Type type = lexer.next(/* skipComments= */ false, true).type;
while (type == Lexeme::BrokenComment || type == Lexeme::Comment || type == Lexeme::BlockComment)
{
@ -2802,7 +2821,7 @@ void Parser::nextLexeme()
hotcomments.push_back({hotcommentHeader, lexeme.location, std::string(text + 1, text + end)});
}
type = lexer.next(/* skipComments= */ false).type;
type = lexer.next(/* skipComments= */ false, !FFlag::LuauParseLocationIgnoreCommentSkipInCapture).type;
}
}
else

View File

@ -11,7 +11,7 @@
namespace Luau
{
static void vformatAppend(std::string& ret, const char* fmt, va_list args)
void vformatAppend(std::string& ret, const char* fmt, va_list args)
{
va_list argscopy;
va_copy(argscopy, args);

View File

@ -26,9 +26,6 @@
#include <time.h>
LUAU_FASTFLAGVARIABLE(DebugLuauTimeTracing, false)
#if defined(LUAU_ENABLE_TIME_TRACE)
namespace Luau
{
namespace TimeTrace
@ -67,6 +64,14 @@ static double getClockTimestamp()
#endif
}
double getClock()
{
static double period = getClockPeriod();
static double start = getClockTimestamp();
return (getClockTimestamp() - start) * period;
}
uint32_t getClockMicroseconds()
{
static double period = getClockPeriod() * 1e6;
@ -74,7 +79,15 @@ uint32_t getClockMicroseconds()
return uint32_t((getClockTimestamp() - start) * period);
}
} // namespace TimeTrace
} // namespace Luau
#if defined(LUAU_ENABLE_TIME_TRACE)
namespace Luau
{
namespace TimeTrace
{
struct GlobalContext
{
GlobalContext() = default;

View File

@ -353,6 +353,11 @@ enum LuauOpcode
// AUX: constant index
LOP_FASTCALL2K,
// FORGPREP: prepare loop variables for a generic for loop, jump to the loop backedge unconditionally
// A: target register; generic for loops assume a register layout [generator, state, index, variables...]
// D: jump offset (-32768..32767)
LOP_FORGPREP,
// Enum entry for number of opcodes, not a valid opcode by itself!
LOP__COUNT
};

View File

@ -3,6 +3,7 @@
#include "Luau/Bytecode.h"
#include "Luau/DenseHash.h"
#include "Luau/StringUtils.h"
#include <string>
@ -80,6 +81,8 @@ public:
void pushDebugUpval(StringRef name);
uint32_t getDebugPC() const;
void addDebugRemark(const char* format, ...) LUAU_PRINTF_ATTR(2, 3);
void finalize();
enum DumpFlags
@ -88,6 +91,7 @@ public:
Dump_Lines = 1 << 1,
Dump_Source = 1 << 2,
Dump_Locals = 1 << 3,
Dump_Remarks = 1 << 4,
};
void setDumpFlags(uint32_t flags)
@ -228,6 +232,9 @@ private:
DenseHashMap<StringRef, unsigned int, StringRefHash> stringTable;
std::vector<std::pair<uint32_t, uint32_t>> debugRemarks;
std::string debugRemarkBuffer;
BytecodeEncoder* encoder = nullptr;
std::string bytecode;

View File

@ -96,6 +96,7 @@ inline bool isJumpD(LuauOpcode op)
case LOP_JUMPIFNOTLT:
case LOP_FORNPREP:
case LOP_FORNLOOP:
case LOP_FORGPREP:
case LOP_FORGLOOP:
case LOP_FORGPREP_INEXT:
case LOP_FORGLOOP_INEXT:
@ -184,6 +185,13 @@ BytecodeBuilder::BytecodeBuilder(BytecodeEncoder* encoder)
, encoder(encoder)
{
LUAU_ASSERT(stringTable.find(StringRef{"", 0}) == nullptr);
// preallocate some buffers that are very likely to grow anyway; this works around std::vector's inefficient growth policy for small arrays
insns.reserve(32);
lines.reserve(32);
constants.reserve(16);
protos.reserve(16);
functions.reserve(8);
}
uint32_t BytecodeBuilder::beginFunction(uint8_t numparams, bool isvararg)
@ -219,8 +227,8 @@ void BytecodeBuilder::endFunction(uint8_t maxstacksize, uint8_t numupvalues)
validate();
#endif
// very approximate: 4 bytes per instruction for code, 1 byte for debug line, and 1-2 bytes for aux data like constants
func.data.reserve(insns.size() * 7);
// very approximate: 4 bytes per instruction for code, 1 byte for debug line, and 1-2 bytes for aux data like constants plus overhead
func.data.reserve(32 + insns.size() * 7);
writeFunction(func.data, currentFunction);
@ -242,10 +250,15 @@ void BytecodeBuilder::endFunction(uint8_t maxstacksize, uint8_t numupvalues)
constantMap.clear();
tableShapeMap.clear();
debugRemarks.clear();
debugRemarkBuffer.clear();
}
void BytecodeBuilder::setMainFunction(uint32_t fid)
{
LUAU_ASSERT(fid < functions.size());
mainFunction = fid;
}
@ -505,9 +518,40 @@ uint32_t BytecodeBuilder::getDebugPC() const
return uint32_t(insns.size());
}
void BytecodeBuilder::addDebugRemark(const char* format, ...)
{
if ((dumpFlags & Dump_Remarks) == 0)
return;
size_t offset = debugRemarkBuffer.size();
va_list args;
va_start(args, format);
vformatAppend(debugRemarkBuffer, format, args);
va_end(args);
// we null-terminate all remarks to avoid storing remark length
debugRemarkBuffer += '\0';
debugRemarks.emplace_back(uint32_t(insns.size()), uint32_t(offset));
}
void BytecodeBuilder::finalize()
{
LUAU_ASSERT(bytecode.empty());
// preallocate space for bytecode blob
size_t capacity = 16;
for (auto& p : stringTable)
capacity += p.first.length + 2;
for (const Function& func : functions)
capacity += func.data.size();
bytecode.reserve(capacity);
// assemble final bytecode blob
bytecode = char(LBC_VERSION);
writeStringTable(bytecode);
@ -663,6 +707,8 @@ void BytecodeBuilder::writeFunction(std::string& ss, uint32_t id) const
void BytecodeBuilder::writeLineInfo(std::string& ss) const
{
LUAU_ASSERT(!lines.empty());
// this function encodes lines inside each span as a 8-bit delta to span baseline
// span is always a power of two; depending on the line info input, it may need to be as low as 1
int span = 1 << 24;
@ -693,7 +739,17 @@ void BytecodeBuilder::writeLineInfo(std::string& ss) const
}
// second pass: compute span base
std::vector<int> baseline((lines.size() - 1) / span + 1);
int baselineOne = 0;
std::vector<int> baselineScratch;
int* baseline = &baselineOne;
size_t baselineSize = (lines.size() - 1) / span + 1;
if (baselineSize > 1)
{
// avoid heap allocation for single-element baseline which is most functions (<256 lines)
baselineScratch.resize(baselineSize);
baseline = baselineScratch.data();
}
for (size_t offset = 0; offset < lines.size(); offset += span)
{
@ -725,7 +781,7 @@ void BytecodeBuilder::writeLineInfo(std::string& ss) const
int lastLine = 0;
for (size_t i = 0; i < baseline.size(); ++i)
for (size_t i = 0; i < baselineSize; ++i)
{
writeInt(ss, baseline[i] - lastLine);
lastLine = baseline[i];
@ -1214,6 +1270,11 @@ void BytecodeBuilder::validate() const
VJUMP(LUAU_INSN_D(insn));
break;
case LOP_FORGPREP:
VREG(LUAU_INSN_A(insn) + 2 + 1); // forg loop protocol: A, A+1, A+2 are used for iteration protocol; A+3, ... are loop variables
VJUMP(LUAU_INSN_D(insn));
break;
case LOP_FORGLOOP:
VREG(
LUAU_INSN_A(insn) + 2 + insns[i + 1]); // forg loop protocol: A, A+1, A+2 are used for iteration protocol; A+3, ... are loop variables
@ -1567,6 +1628,10 @@ const uint32_t* BytecodeBuilder::dumpInstruction(const uint32_t* code, std::stri
formatAppend(result, "FORNLOOP R%d %+d\n", LUAU_INSN_A(insn), LUAU_INSN_D(insn));
break;
case LOP_FORGPREP:
formatAppend(result, "FORGPREP R%d %+d\n", LUAU_INSN_A(insn), LUAU_INSN_D(insn));
break;
case LOP_FORGLOOP:
formatAppend(result, "FORGLOOP R%d %+d %d\n", LUAU_INSN_A(insn), LUAU_INSN_D(insn), *code++);
break;
@ -1665,6 +1730,7 @@ std::string BytecodeBuilder::dumpCurrentFunction() const
const uint32_t* codeEnd = insns.data() + insns.size();
int lastLine = -1;
size_t nextRemark = 0;
std::string result;
@ -1687,6 +1753,7 @@ std::string BytecodeBuilder::dumpCurrentFunction() const
while (code != codeEnd)
{
uint8_t op = LUAU_INSN_OP(*code);
uint32_t pc = uint32_t(code - insns.data());
if (op == LOP_PREPVARARGS)
{
@ -1695,9 +1762,18 @@ std::string BytecodeBuilder::dumpCurrentFunction() const
continue;
}
if (dumpFlags & Dump_Remarks)
{
while (nextRemark < debugRemarks.size() && debugRemarks[nextRemark].first == pc)
{
formatAppend(result, "REMARK %s\n", debugRemarkBuffer.c_str() + debugRemarks[nextRemark].second);
nextRemark++;
}
}
if (dumpFlags & Dump_Source)
{
int line = lines[code - insns.data()];
int line = lines[pc];
if (line > 0 && line != lastLine)
{
@ -1709,7 +1785,7 @@ std::string BytecodeBuilder::dumpCurrentFunction() const
if (dumpFlags & Dump_Lines)
{
formatAppend(result, "%d: ", lines[code - insns.data()]);
formatAppend(result, "%d: ", lines[pc]);
}
code = dumpInstruction(code, result);
@ -1722,11 +1798,11 @@ void BytecodeBuilder::setDumpSource(const std::string& source)
{
dumpSource.clear();
std::string::size_type pos = 0;
size_t pos = 0;
while (pos != std::string::npos)
{
std::string::size_type next = source.find('\n', pos);
size_t next = source.find('\n', pos);
if (next == std::string::npos)
{

View File

@ -8,12 +8,27 @@
#include "Builtins.h"
#include "ConstantFolding.h"
#include "CostModel.h"
#include "TableShape.h"
#include "ValueTracking.h"
#include <algorithm>
#include <bitset>
#include <math.h>
#include <limits.h>
LUAU_FASTFLAGVARIABLE(LuauCompileSupportInlining, false)
LUAU_FASTFLAGVARIABLE(LuauCompileIter, false)
LUAU_FASTFLAGVARIABLE(LuauCompileIterNoReserve, false)
LUAU_FASTFLAGVARIABLE(LuauCompileIterNoPairs, false)
LUAU_FASTINTVARIABLE(LuauCompileLoopUnrollThreshold, 25)
LUAU_FASTINTVARIABLE(LuauCompileLoopUnrollThresholdMaxBoost, 300)
LUAU_FASTINTVARIABLE(LuauCompileInlineThreshold, 25)
LUAU_FASTINTVARIABLE(LuauCompileInlineThresholdMaxBoost, 300)
LUAU_FASTINTVARIABLE(LuauCompileInlineDepth, 5)
namespace Luau
{
@ -77,8 +92,12 @@ struct Compiler
, globals(AstName())
, variables(nullptr)
, constants(nullptr)
, locstants(nullptr)
, tableShapes(nullptr)
{
// preallocate some buffers that are very likely to grow anyway; this works around std::vector's inefficient growth policy for small arrays
localStack.reserve(16);
upvals.reserve(16);
}
uint8_t getLocal(AstLocal* local)
@ -138,6 +157,52 @@ struct Compiler
}
}
AstExprFunction* getFunctionExpr(AstExpr* node)
{
if (AstExprLocal* le = node->as<AstExprLocal>())
{
Variable* lv = variables.find(le->local);
if (!lv || lv->written || !lv->init)
return nullptr;
return getFunctionExpr(lv->init);
}
else if (AstExprGroup* ge = node->as<AstExprGroup>())
return getFunctionExpr(ge->expr);
else
return node->as<AstExprFunction>();
}
bool canInlineFunctionBody(AstStat* stat)
{
struct CanInlineVisitor : AstVisitor
{
bool result = true;
bool visit(AstExpr* node) override
{
// nested functions may capture function arguments, and our upval handling doesn't handle elided variables (constant)
// TODO: we could remove this case if we changed function compilation to create temporary locals for constant upvalues
// TODO: additionally we would need to change upvalue handling in compileExprFunction to handle upvalue->local migration
result = result && !node->is<AstExprFunction>();
return result;
}
bool visit(AstStat* node) override
{
// loops may need to be unrolled which can result in cost amplification
result = result && !node->is<AstStatFor>();
return result;
}
};
CanInlineVisitor canInline;
stat->visit(&canInline);
return canInline.result;
}
uint32_t compileFunction(AstExprFunction* func)
{
LUAU_TIMETRACE_SCOPE("Compiler::compileFunction", "Compiler");
@ -205,11 +270,21 @@ struct Compiler
bytecode.endFunction(uint8_t(stackSize), uint8_t(upvals.size()));
stackSize = 0;
Function& f = functions[func];
f.id = fid;
f.upvals = std::move(upvals);
f.upvals = upvals;
// record information for inlining
if (FFlag::LuauCompileSupportInlining && options.optimizationLevel >= 2 && !func->vararg && canInlineFunctionBody(func->body) &&
!getfenvUsed && !setfenvUsed)
{
f.canInline = true;
f.stackSize = stackSize;
f.costModel = modelCost(func->body, func->args.data, func->args.size);
}
upvals.clear(); // note: instead of std::move above, we copy & clear to preserve capacity for future pushes
stackSize = 0;
return fid;
}
@ -379,12 +454,183 @@ struct Compiler
}
}
bool tryCompileInlinedCall(AstExprCall* expr, AstExprFunction* func, uint8_t target, uint8_t targetCount, bool multRet, int thresholdBase,
int thresholdMaxBoost, int depthLimit)
{
Function* fi = functions.find(func);
LUAU_ASSERT(fi);
// make sure we have enough register space
if (regTop > 128 || fi->stackSize > 32)
{
bytecode.addDebugRemark("inlining failed: high register pressure");
return false;
}
// we should ideally aggregate the costs during recursive inlining, but for now simply limit the depth
if (int(inlineFrames.size()) >= depthLimit)
{
bytecode.addDebugRemark("inlining failed: too many inlined frames");
return false;
}
// compiling recursive inlining is difficult because we share constant/variable state but need to bind variables to different registers
for (InlineFrame& frame : inlineFrames)
if (frame.func == func)
{
bytecode.addDebugRemark("inlining failed: can't inline recursive calls");
return false;
}
// TODO: we can compile multret functions if all returns of the function are multret as well
if (multRet)
{
bytecode.addDebugRemark("inlining failed: can't convert fixed returns to multret");
return false;
}
// TODO: we can compile functions with mismatching arity at call site but it's more annoying
if (func->args.size != expr->args.size)
{
bytecode.addDebugRemark("inlining failed: argument count mismatch (expected %d, got %d)", int(func->args.size), int(expr->args.size));
return false;
}
// we use a dynamic cost threshold that's based on the fixed limit boosted by the cost advantage we gain due to inlining
bool varc[8] = {};
for (size_t i = 0; i < expr->args.size && i < 8; ++i)
varc[i] = isConstant(expr->args.data[i]);
int inlinedCost = computeCost(fi->costModel, varc, std::min(int(expr->args.size), 8));
int baselineCost = computeCost(fi->costModel, nullptr, 0) + 3;
int inlineProfit = (inlinedCost == 0) ? thresholdMaxBoost : std::min(thresholdMaxBoost, 100 * baselineCost / inlinedCost);
int threshold = thresholdBase * inlineProfit / 100;
if (inlinedCost > threshold)
{
bytecode.addDebugRemark("inlining failed: too expensive (cost %d, profit %.2fx)", inlinedCost, double(inlineProfit) / 100);
return false;
}
bytecode.addDebugRemark(
"inlining succeeded (cost %d, profit %.2fx, depth %d)", inlinedCost, double(inlineProfit) / 100, int(inlineFrames.size()));
compileInlinedCall(expr, func, target, targetCount);
return true;
}
void compileInlinedCall(AstExprCall* expr, AstExprFunction* func, uint8_t target, uint8_t targetCount)
{
RegScope rs(this);
size_t oldLocals = localStack.size();
// note that we push the frame early; this is needed to block recursive inline attempts
inlineFrames.push_back({func, target, targetCount});
// evaluate all arguments; note that we don't emit code for constant arguments (relying on constant folding)
for (size_t i = 0; i < func->args.size; ++i)
{
AstLocal* var = func->args.data[i];
AstExpr* arg = expr->args.data[i];
if (Variable* vv = variables.find(var); vv && vv->written)
{
// if the argument is mutated, we need to allocate a fresh register even if it's a constant
uint8_t reg = allocReg(arg, 1);
compileExprTemp(arg, reg);
pushLocal(var, reg);
}
else if (const Constant* cv = constants.find(arg); cv && cv->type != Constant::Type_Unknown)
{
// since the argument is not mutated, we can simply fold the value into the expressions that need it
locstants[var] = *cv;
}
else
{
AstExprLocal* le = arg->as<AstExprLocal>();
Variable* lv = le ? variables.find(le->local) : nullptr;
// if the argument is a local that isn't mutated, we will simply reuse the existing register
if (isExprLocalReg(arg) && (!lv || !lv->written))
{
uint8_t reg = getLocal(le->local);
pushLocal(var, reg);
}
else
{
uint8_t reg = allocReg(arg, 1);
compileExprTemp(arg, reg);
pushLocal(var, reg);
}
}
}
// fold constant values updated above into expressions in the function body
foldConstants(constants, variables, locstants, func->body);
bool usedFallthrough = false;
for (size_t i = 0; i < func->body->body.size; ++i)
{
AstStat* stat = func->body->body.data[i];
if (AstStatReturn* ret = stat->as<AstStatReturn>())
{
// Optimization: use fallthrough when compiling return at the end of the function to avoid an extra JUMP
compileInlineReturn(ret, /* fallthrough= */ true);
// TODO: This doesn't work when return is part of control flow; ideally we would track the state somehow and generalize this
usedFallthrough = true;
break;
}
else
compileStat(stat);
}
// for the fallthrough path we need to ensure we clear out target registers
if (!usedFallthrough && !allPathsEndWithReturn(func->body))
{
for (size_t i = 0; i < targetCount; ++i)
bytecode.emitABC(LOP_LOADNIL, uint8_t(target + i), 0, 0);
}
popLocals(oldLocals);
size_t returnLabel = bytecode.emitLabel();
patchJumps(expr, inlineFrames.back().returnJumps, returnLabel);
inlineFrames.pop_back();
// clean up constant state for future inlining attempts
for (size_t i = 0; i < func->args.size; ++i)
if (Constant* var = locstants.find(func->args.data[i]))
var->type = Constant::Type_Unknown;
foldConstants(constants, variables, locstants, func->body);
}
void compileExprCall(AstExprCall* expr, uint8_t target, uint8_t targetCount, bool targetTop = false, bool multRet = false)
{
LUAU_ASSERT(!targetTop || unsigned(target + targetCount) == regTop);
setDebugLine(expr); // normally compileExpr sets up line info, but compileExprCall can be called directly
// try inlining the function
if (options.optimizationLevel >= 2 && !expr->self)
{
AstExprFunction* func = getFunctionExpr(expr->func);
Function* fi = func ? functions.find(func) : nullptr;
if (fi && fi->canInline &&
tryCompileInlinedCall(expr, func, target, targetCount, multRet, FInt::LuauCompileInlineThreshold,
FInt::LuauCompileInlineThresholdMaxBoost, FInt::LuauCompileInlineDepth))
return;
if (fi && !fi->canInline)
bytecode.addDebugRemark("inlining failed: complex constructs in function body");
}
RegScope rs(this);
unsigned int regCount = std::max(unsigned(1 + expr->self + expr->args.size), unsigned(targetCount));
@ -749,7 +995,7 @@ struct Compiler
{
const Constant* c = constants.find(node);
if (!c)
if (!c || c->type == Constant::Type_Unknown)
return -1;
int cid = -1;
@ -1384,27 +1630,29 @@ struct Compiler
{
RegScope rs(this);
// note: cv may be invalidated by compileExpr* so we stop using it before calling compile recursively
const Constant* cv = constants.find(expr->index);
if (cv && cv->type == Constant::Type_Number && double(int(cv->valueNumber)) == cv->valueNumber && cv->valueNumber >= 1 &&
cv->valueNumber <= 256)
if (cv && cv->type == Constant::Type_Number && cv->valueNumber >= 1 && cv->valueNumber <= 256 &&
double(int(cv->valueNumber)) == cv->valueNumber)
{
uint8_t rt = compileExprAuto(expr->expr, rs);
uint8_t i = uint8_t(int(cv->valueNumber) - 1);
uint8_t rt = compileExprAuto(expr->expr, rs);
setDebugLine(expr->index);
bytecode.emitABC(LOP_GETTABLEN, target, rt, i);
}
else if (cv && cv->type == Constant::Type_String)
{
uint8_t rt = compileExprAuto(expr->expr, rs);
BytecodeBuilder::StringRef iname = sref(cv->getString());
int32_t cid = bytecode.addConstantString(iname);
if (cid < 0)
CompileError::raise(expr->location, "Exceeded constant limit; simplify the code to compile");
uint8_t rt = compileExprAuto(expr->expr, rs);
setDebugLine(expr->index);
bytecode.emitABC(LOP_GETTABLEKS, target, rt, uint8_t(BytecodeBuilder::getStringHash(iname)));
@ -1550,8 +1798,9 @@ struct Compiler
}
else if (AstExprLocal* expr = node->as<AstExprLocal>())
{
if (expr->upvalue)
if (FFlag::LuauCompileSupportInlining ? !isExprLocalReg(expr) : expr->upvalue)
{
LUAU_ASSERT(expr->upvalue);
uint8_t uid = getUpval(expr->local);
bytecode.emitABC(LOP_GETUPVAL, target, uid, 0);
@ -1639,12 +1888,12 @@ struct Compiler
// initializes target..target+targetCount-1 range using expressions from the list
// if list has fewer expressions, and last expression is a call, we assume the call returns the rest of the values
// if list has fewer expressions, and last expression isn't a call, we fill the rest with nil
// assumes target register range can be clobbered and is at the top of the register space
void compileExprListTop(const AstArray<AstExpr*>& list, uint8_t target, uint8_t targetCount)
// assumes target register range can be clobbered and is at the top of the register space if targetTop = true
void compileExprListTemp(const AstArray<AstExpr*>& list, uint8_t target, uint8_t targetCount, bool targetTop)
{
// we assume that target range is at the top of the register space and can be clobbered
// this is what allows us to compile the last call expression - if it's a call - using targetTop=true
LUAU_ASSERT(unsigned(target + targetCount) == regTop);
LUAU_ASSERT(!targetTop || unsigned(target + targetCount) == regTop);
if (list.size == targetCount)
{
@ -1672,7 +1921,7 @@ struct Compiler
if (AstExprCall* expr = last->as<AstExprCall>())
{
compileExprCall(expr, uint8_t(target + list.size - 1), uint8_t(targetCount - (list.size - 1)), /* targetTop= */ true);
compileExprCall(expr, uint8_t(target + list.size - 1), uint8_t(targetCount - (list.size - 1)), targetTop);
}
else if (AstExprVarargs* expr = last->as<AstExprVarargs>())
{
@ -1754,8 +2003,10 @@ struct Compiler
if (AstExprLocal* expr = node->as<AstExprLocal>())
{
if (expr->upvalue)
if (FFlag::LuauCompileSupportInlining ? !isExprLocalReg(expr) : expr->upvalue)
{
LUAU_ASSERT(expr->upvalue);
LValue result = {LValue::Kind_Upvalue};
result.upval = getUpval(expr->local);
result.location = node->location;
@ -1862,7 +2113,7 @@ struct Compiler
bool isExprLocalReg(AstExpr* expr)
{
AstExprLocal* le = expr->as<AstExprLocal>();
if (!le || le->upvalue)
if (!le || (!FFlag::LuauCompileSupportInlining && le->upvalue))
return false;
Local* l = locals.find(le->local);
@ -2069,6 +2320,23 @@ struct Compiler
loops.pop_back();
}
void compileInlineReturn(AstStatReturn* stat, bool fallthrough)
{
setDebugLine(stat); // normally compileStat sets up line info, but compileInlineReturn can be called directly
InlineFrame frame = inlineFrames.back();
compileExprListTemp(stat->list, frame.target, frame.targetCount, /* targetTop= */ false);
if (!fallthrough)
{
size_t jumpLabel = bytecode.emitLabel();
bytecode.emitAD(LOP_JUMP, 0, 0);
inlineFrames.back().returnJumps.push_back(jumpLabel);
}
}
void compileStatReturn(AstStatReturn* stat)
{
RegScope rs(this);
@ -2127,16 +2395,137 @@ struct Compiler
// note: allocReg in this case allocates into parent block register - note that we don't have RegScope here
uint8_t vars = allocReg(stat, unsigned(stat->vars.size));
compileExprListTop(stat->values, vars, uint8_t(stat->vars.size));
compileExprListTemp(stat->values, vars, uint8_t(stat->vars.size), /* targetTop= */ true);
for (size_t i = 0; i < stat->vars.size; ++i)
pushLocal(stat->vars.data[i], uint8_t(vars + i));
}
int getConstantShort(AstExpr* expr)
{
const Constant* c = constants.find(expr);
if (c && c->type == Constant::Type_Number)
{
double n = c->valueNumber;
if (n >= -32767 && n <= 32767 && double(int(n)) == n)
return int(n);
}
return INT_MIN;
}
bool canUnrollForBody(AstStatFor* stat)
{
struct CanUnrollVisitor : AstVisitor
{
bool result = true;
bool visit(AstExpr* node) override
{
// functions may capture loop variable, and our upval handling doesn't handle elided variables (constant)
// TODO: we could remove this case if we changed function compilation to create temporary locals for constant upvalues
result = result && !node->is<AstExprFunction>();
return result;
}
bool visit(AstStat* node) override
{
// while we can easily unroll nested loops, our cost model doesn't take unrolling into account so this can result in code explosion
// we also avoid continue/break since they introduce control flow across iterations
result = result && !node->is<AstStatFor>() && !node->is<AstStatContinue>() && !node->is<AstStatBreak>();
return result;
}
};
CanUnrollVisitor canUnroll;
stat->body->visit(&canUnroll);
return canUnroll.result;
}
bool tryCompileUnrolledFor(AstStatFor* stat, int thresholdBase, int thresholdMaxBoost)
{
int from = getConstantShort(stat->from);
int to = getConstantShort(stat->to);
int step = stat->step ? getConstantShort(stat->step) : 1;
// check that limits are reasonably small and trip count can be computed
if (from == INT_MIN || to == INT_MIN || step == INT_MIN || step == 0 || (step < 0 && to > from) || (step > 0 && to < from))
{
bytecode.addDebugRemark("loop unroll failed: invalid iteration count");
return false;
}
if (!canUnrollForBody(stat))
{
bytecode.addDebugRemark("loop unroll failed: unsupported loop body");
return false;
}
if (Variable* lv = variables.find(stat->var); lv && lv->written)
{
bytecode.addDebugRemark("loop unroll failed: mutable loop variable");
return false;
}
int tripCount = (to - from) / step + 1;
if (tripCount > thresholdBase)
{
bytecode.addDebugRemark("loop unroll failed: too many iterations (%d)", tripCount);
return false;
}
AstLocal* var = stat->var;
uint64_t costModel = modelCost(stat->body, &var, 1);
// we use a dynamic cost threshold that's based on the fixed limit boosted by the cost advantage we gain due to unrolling
bool varc = true;
int unrolledCost = computeCost(costModel, &varc, 1) * tripCount;
int baselineCost = (computeCost(costModel, nullptr, 0) + 1) * tripCount;
int unrollProfit = (unrolledCost == 0) ? thresholdMaxBoost : std::min(thresholdMaxBoost, 100 * baselineCost / unrolledCost);
int threshold = thresholdBase * unrollProfit / 100;
if (unrolledCost > threshold)
{
bytecode.addDebugRemark(
"loop unroll failed: too expensive (iterations %d, cost %d, profit %.2fx)", tripCount, unrolledCost, double(unrollProfit) / 100);
return false;
}
bytecode.addDebugRemark("loop unroll succeeded (iterations %d, cost %d, profit %.2fx)", tripCount, unrolledCost, double(unrollProfit) / 100);
for (int i = from; step > 0 ? i <= to : i >= to; i += step)
{
// we need to re-fold constants in the loop body with the new value; this reuses computed constant values elsewhere in the tree
locstants[var].type = Constant::Type_Number;
locstants[var].valueNumber = i;
foldConstants(constants, variables, locstants, stat);
compileStat(stat->body);
}
// clean up fold state in case we need to recompile - normally we compile the loop body once, but due to inlining we may need to do it again
locstants[var].type = Constant::Type_Unknown;
foldConstants(constants, variables, locstants, stat);
return true;
}
void compileStatFor(AstStatFor* stat)
{
RegScope rs(this);
// Optimization: small loops can be unrolled when it is profitable
if (options.optimizationLevel >= 2 && isConstant(stat->to) && isConstant(stat->from) && (!stat->step || isConstant(stat->step)))
if (tryCompileUnrolledFor(stat, FInt::LuauCompileLoopUnrollThreshold, FInt::LuauCompileLoopUnrollThresholdMaxBoost))
return;
size_t oldLocals = localStack.size();
size_t oldJumps = loopJumps.size();
@ -2210,12 +2599,17 @@ struct Compiler
uint8_t regs = allocReg(stat, 3);
// this puts initial values of (generator, state, index) into the loop registers
compileExprListTop(stat->values, regs, 3);
compileExprListTemp(stat->values, regs, 3, /* targetTop= */ true);
// for the general case, we will execute a CALL for every iteration that needs to evaluate "variables... = generator(state, index)"
// this requires at least extra 3 stack slots after index
// note that these stack slots overlap with the variables so we only need to reserve them to make sure stack frame is large enough
reserveReg(stat, 3);
// we don't need this because the extra stack space is just for calling the function with a loop protocol which is similar to calling
// metamethods - it should fit into the extra stack reservation
if (!FFlag::LuauCompileIterNoReserve)
{
// for the general case, we will execute a CALL for every iteration that needs to evaluate "variables... = generator(state, index)"
// this requires at least extra 3 stack slots after index
// note that these stack slots overlap with the variables so we only need to reserve them to make sure stack frame is large enough
reserveReg(stat, 3);
}
// note that we reserve at least 2 variables; this allows our fast path to assume that we need 2 variables instead of 1 or 2
uint8_t vars = allocReg(stat, std::max(unsigned(stat->vars.size), 2u));
@ -2224,7 +2618,7 @@ struct Compiler
// Optimization: when we iterate through pairs/ipairs, we generate special bytecode that optimizes the traversal using internal iteration
// index These instructions dynamically check if generator is equal to next/inext and bail out They assume that the generator produces 2
// variables, which is why we allocate at least 2 above (see vars assignment)
LuauOpcode skipOp = LOP_JUMP;
LuauOpcode skipOp = FFlag::LuauCompileIter ? LOP_FORGPREP : LOP_JUMP;
LuauOpcode loopOp = LOP_FORGLOOP;
if (options.optimizationLevel >= 1 && stat->vars.size <= 2)
@ -2241,7 +2635,7 @@ struct Compiler
else if (builtin.isGlobal("pairs")) // for .. in pairs(t)
{
skipOp = LOP_FORGPREP_NEXT;
loopOp = LOP_FORGLOOP_NEXT;
loopOp = FFlag::LuauCompileIterNoPairs ? LOP_FORGLOOP : LOP_FORGLOOP_NEXT;
}
}
else if (stat->values.size == 2)
@ -2251,7 +2645,7 @@ struct Compiler
if (builtin.isGlobal("next")) // for .. in next,t
{
skipOp = LOP_FORGPREP_NEXT;
loopOp = LOP_FORGLOOP_NEXT;
loopOp = FFlag::LuauCompileIterNoPairs ? LOP_FORGLOOP : LOP_FORGLOOP_NEXT;
}
}
}
@ -2388,10 +2782,10 @@ struct Compiler
// compute values into temporaries
uint8_t regs = allocReg(stat, unsigned(stat->vars.size));
compileExprListTop(stat->values, regs, uint8_t(stat->vars.size));
compileExprListTemp(stat->values, regs, uint8_t(stat->vars.size), /* targetTop= */ true);
// assign variables that have associated values; note that if we have fewer values than variables, we'll assign nil because compileExprListTop
// will generate nils
// assign variables that have associated values; note that if we have fewer values than variables, we'll assign nil because
// compileExprListTemp will generate nils
for (size_t i = 0; i < stat->vars.size; ++i)
{
setDebugLine(stat->vars.data[i]);
@ -2549,7 +2943,10 @@ struct Compiler
}
else if (AstStatReturn* stat = node->as<AstStatReturn>())
{
compileStatReturn(stat);
if (options.optimizationLevel >= 2 && !inlineFrames.empty())
compileInlineReturn(stat, /* fallthrough= */ false);
else
compileStatReturn(stat);
}
else if (AstStatExpr* stat = node->as<AstStatExpr>())
{
@ -2826,6 +3223,8 @@ struct Compiler
: self(self)
, functions(functions)
{
// preallocate the result; this works around std::vector's inefficient growth policy for small arrays
functions.reserve(16);
}
bool visit(AstExprFunction* node) override
@ -2941,6 +3340,10 @@ struct Compiler
{
uint32_t id;
std::vector<AstLocal*> upvals;
uint64_t costModel = 0;
unsigned int stackSize = 0;
bool canInline = false;
};
struct Local
@ -2970,6 +3373,16 @@ struct Compiler
AstExpr* untilCondition;
};
struct InlineFrame
{
AstExprFunction* func;
uint8_t target;
uint8_t targetCount;
std::vector<size_t> returnJumps;
};
BytecodeBuilder& bytecode;
CompileOptions options;
@ -2979,6 +3392,7 @@ struct Compiler
DenseHashMap<AstName, Global> globals;
DenseHashMap<AstLocal*, Variable> variables;
DenseHashMap<AstExpr*, Constant> constants;
DenseHashMap<AstLocal*, Constant> locstants;
DenseHashMap<AstExprTable*, TableShape> tableShapes;
unsigned int regTop = 0;
@ -2991,6 +3405,7 @@ struct Compiler
std::vector<AstLocal*> upvals;
std::vector<LoopJump> loopJumps;
std::vector<Loop> loops;
std::vector<InlineFrame> inlineFrames;
};
void compileOrThrow(BytecodeBuilder& bytecode, AstStatBlock* root, const AstNameTable& names, const CompileOptions& options)
@ -3008,7 +3423,7 @@ void compileOrThrow(BytecodeBuilder& bytecode, AstStatBlock* root, const AstName
if (options.optimizationLevel >= 1)
{
// this pass analyzes constantness of expressions
foldConstants(compiler.constants, compiler.variables, root);
foldConstants(compiler.constants, compiler.variables, compiler.locstants, root);
// this pass analyzes table assignments to estimate table shapes for initially empty tables
predictTableShapes(compiler.tableShapes, root);

View File

@ -3,6 +3,8 @@
#include <math.h>
LUAU_FASTFLAG(LuauCompileSupportInlining)
namespace Luau
{
namespace Compile
@ -191,13 +193,13 @@ struct ConstantVisitor : AstVisitor
{
DenseHashMap<AstExpr*, Constant>& constants;
DenseHashMap<AstLocal*, Variable>& variables;
DenseHashMap<AstLocal*, Constant>& locals;
DenseHashMap<AstLocal*, Constant> locals;
ConstantVisitor(DenseHashMap<AstExpr*, Constant>& constants, DenseHashMap<AstLocal*, Variable>& variables)
ConstantVisitor(
DenseHashMap<AstExpr*, Constant>& constants, DenseHashMap<AstLocal*, Variable>& variables, DenseHashMap<AstLocal*, Constant>& locals)
: constants(constants)
, variables(variables)
, locals(nullptr)
, locals(locals)
{
}
@ -290,7 +292,8 @@ struct ConstantVisitor : AstVisitor
Constant la = analyze(expr->left);
Constant ra = analyze(expr->right);
if (la.type != Constant::Type_Unknown && ra.type != Constant::Type_Unknown)
// note: ra doesn't need to be constant to fold and/or
if (la.type != Constant::Type_Unknown)
foldBinary(result, expr->op, la, ra);
}
else if (AstExprTypeAssertion* expr = node->as<AstExprTypeAssertion>())
@ -313,12 +316,35 @@ struct ConstantVisitor : AstVisitor
LUAU_ASSERT(!"Unknown expression type");
}
if (result.type != Constant::Type_Unknown)
constants[node] = result;
recordConstant(constants, node, result);
return result;
}
template<typename T>
void recordConstant(DenseHashMap<T, Constant>& map, T key, const Constant& value)
{
if (value.type != Constant::Type_Unknown)
map[key] = value;
else if (!FFlag::LuauCompileSupportInlining)
;
else if (Constant* old = map.find(key))
old->type = Constant::Type_Unknown;
}
void recordValue(AstLocal* local, const Constant& value)
{
// note: we rely on trackValues to have been run before us
Variable* v = variables.find(local);
LUAU_ASSERT(v);
if (!v->written)
{
v->constant = (value.type != Constant::Type_Unknown);
recordConstant(locals, local, value);
}
}
bool visit(AstExpr* node) override
{
// note: we short-circuit the visitor traversal through any expression trees by returning false
@ -335,18 +361,7 @@ struct ConstantVisitor : AstVisitor
{
Constant arg = analyze(node->values.data[i]);
if (arg.type != Constant::Type_Unknown)
{
// note: we rely on trackValues to have been run before us
Variable* v = variables.find(node->vars.data[i]);
LUAU_ASSERT(v);
if (!v->written)
{
locals[node->vars.data[i]] = arg;
v->constant = true;
}
}
recordValue(node->vars.data[i], arg);
}
if (node->vars.size > node->values.size)
@ -360,15 +375,8 @@ struct ConstantVisitor : AstVisitor
{
for (size_t i = node->values.size; i < node->vars.size; ++i)
{
// note: we rely on trackValues to have been run before us
Variable* v = variables.find(node->vars.data[i]);
LUAU_ASSERT(v);
if (!v->written)
{
locals[node->vars.data[i]].type = Constant::Type_Nil;
v->constant = true;
}
Constant nil = {Constant::Type_Nil};
recordValue(node->vars.data[i], nil);
}
}
}
@ -384,9 +392,10 @@ struct ConstantVisitor : AstVisitor
}
};
void foldConstants(DenseHashMap<AstExpr*, Constant>& constants, DenseHashMap<AstLocal*, Variable>& variables, AstNode* root)
void foldConstants(DenseHashMap<AstExpr*, Constant>& constants, DenseHashMap<AstLocal*, Variable>& variables,
DenseHashMap<AstLocal*, Constant>& locals, AstNode* root)
{
ConstantVisitor visitor{constants, variables};
ConstantVisitor visitor{constants, variables, locals};
root->visit(&visitor);
}

View File

@ -42,7 +42,8 @@ struct Constant
}
};
void foldConstants(DenseHashMap<AstExpr*, Constant>& constants, DenseHashMap<AstLocal*, Variable>& variables, AstNode* root);
void foldConstants(DenseHashMap<AstExpr*, Constant>& constants, DenseHashMap<AstLocal*, Variable>& variables,
DenseHashMap<AstLocal*, Constant>& locals, AstNode* root);
} // namespace Compile
} // namespace Luau

View File

@ -0,0 +1,258 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "CostModel.h"
#include "Luau/Common.h"
#include "Luau/DenseHash.h"
namespace Luau
{
namespace Compile
{
inline uint64_t parallelAddSat(uint64_t x, uint64_t y)
{
uint64_t s = x + y;
uint64_t m = s & 0x8080808080808080ull; // saturation mask
return (s ^ m) | (m - (m >> 7));
}
struct Cost
{
static const uint64_t kLiteral = ~0ull;
// cost model: 8 bytes, where first byte is the baseline cost, and the next 7 bytes are discounts for when variable #i is constant
uint64_t model;
// constant mask: 8-byte 0xff mask; equal to all ff's for literals, for variables only byte #i (1+) is set to align with model
uint64_t constant;
Cost(int cost = 0, uint64_t constant = 0)
: model(cost < 0x7f ? cost : 0x7f)
, constant(constant)
{
}
Cost operator+(const Cost& other) const
{
Cost result;
result.model = parallelAddSat(model, other.model);
return result;
}
Cost& operator+=(const Cost& other)
{
model = parallelAddSat(model, other.model);
constant = 0;
return *this;
}
static Cost fold(const Cost& x, const Cost& y)
{
uint64_t newmodel = parallelAddSat(x.model, y.model);
uint64_t newconstant = x.constant & y.constant;
// the extra cost for folding is 1; the discount is 1 for the variable that is shared by x&y (or whichever one is used in x/y if the other is
// literal)
uint64_t extra = (newconstant == kLiteral) ? 0 : (1 | (0x0101010101010101ull & newconstant));
Cost result;
result.model = parallelAddSat(newmodel, extra);
result.constant = newconstant;
return result;
}
};
struct CostVisitor : AstVisitor
{
DenseHashMap<AstLocal*, uint64_t> vars;
Cost result;
CostVisitor()
: vars(nullptr)
{
}
Cost model(AstExpr* node)
{
if (AstExprGroup* expr = node->as<AstExprGroup>())
{
return model(expr->expr);
}
else if (node->is<AstExprConstantNil>() || node->is<AstExprConstantBool>() || node->is<AstExprConstantNumber>() ||
node->is<AstExprConstantString>())
{
return Cost(0, Cost::kLiteral);
}
else if (AstExprLocal* expr = node->as<AstExprLocal>())
{
const uint64_t* i = vars.find(expr->local);
return Cost(0, i ? *i : 0); // locals typically don't require extra instructions to compute
}
else if (node->is<AstExprGlobal>())
{
return 1;
}
else if (node->is<AstExprVarargs>())
{
return 3;
}
else if (AstExprCall* expr = node->as<AstExprCall>())
{
Cost cost = 3;
cost += model(expr->func);
for (size_t i = 0; i < expr->args.size; ++i)
{
Cost ac = model(expr->args.data[i]);
// for constants/locals we still need to copy them to the argument list
cost += ac.model == 0 ? Cost(1) : ac;
}
return cost;
}
else if (AstExprIndexName* expr = node->as<AstExprIndexName>())
{
return model(expr->expr) + 1;
}
else if (AstExprIndexExpr* expr = node->as<AstExprIndexExpr>())
{
return model(expr->expr) + model(expr->index) + 1;
}
else if (AstExprFunction* expr = node->as<AstExprFunction>())
{
return 10; // high baseline cost due to allocation
}
else if (AstExprTable* expr = node->as<AstExprTable>())
{
Cost cost = 10; // high baseline cost due to allocation
for (size_t i = 0; i < expr->items.size; ++i)
{
const AstExprTable::Item& item = expr->items.data[i];
if (item.key)
cost += model(item.key);
cost += model(item.value);
cost += 1;
}
return cost;
}
else if (AstExprUnary* expr = node->as<AstExprUnary>())
{
return Cost::fold(model(expr->expr), Cost(0, Cost::kLiteral));
}
else if (AstExprBinary* expr = node->as<AstExprBinary>())
{
return Cost::fold(model(expr->left), model(expr->right));
}
else if (AstExprTypeAssertion* expr = node->as<AstExprTypeAssertion>())
{
return model(expr->expr);
}
else if (AstExprIfElse* expr = node->as<AstExprIfElse>())
{
return model(expr->condition) + model(expr->trueExpr) + model(expr->falseExpr) + 2;
}
else
{
LUAU_ASSERT(!"Unknown expression type");
return {};
}
}
void assign(AstExpr* expr)
{
// variable assignments reset variable mask, so that further uses of this variable aren't discounted
// this doesn't work perfectly with backwards control flow like loops, but is good enough for a single pass
if (AstExprLocal* lv = expr->as<AstExprLocal>())
if (uint64_t* i = vars.find(lv->local))
*i = 0;
}
bool visit(AstExpr* node) override
{
// note: we short-circuit the visitor traversal through any expression trees by returning false
// recursive traversal is happening inside model() which makes it easier to get the resulting value of the subexpression
result += model(node);
return false;
}
bool visit(AstStat* node) override
{
if (node->is<AstStatIf>())
result += 2;
else if (node->is<AstStatWhile>() || node->is<AstStatRepeat>() || node->is<AstStatFor>() || node->is<AstStatForIn>())
result += 2;
else if (node->is<AstStatBreak>() || node->is<AstStatContinue>())
result += 1;
return true;
}
bool visit(AstStatLocal* node) override
{
for (size_t i = 0; i < node->values.size; ++i)
{
Cost arg = model(node->values.data[i]);
// propagate constant mask from expression through variables
if (arg.constant && i < node->vars.size)
vars[node->vars.data[i]] = arg.constant;
result += arg;
}
return false;
}
bool visit(AstStatAssign* node) override
{
for (size_t i = 0; i < node->vars.size; ++i)
assign(node->vars.data[i]);
return true;
}
bool visit(AstStatCompoundAssign* node) override
{
assign(node->var);
// if lhs is not a local, setting it requires an extra table operation
result += node->var->is<AstExprLocal>() ? 1 : 2;
return true;
}
};
uint64_t modelCost(AstNode* root, AstLocal* const* vars, size_t varCount)
{
CostVisitor visitor;
for (size_t i = 0; i < varCount && i < 7; ++i)
visitor.vars[vars[i]] = 0xffull << (i * 8 + 8);
root->visit(&visitor);
return visitor.result.model;
}
int computeCost(uint64_t model, const bool* varsConst, size_t varCount)
{
int cost = int(model & 0x7f);
// don't apply discounts to what is likely a saturated sum
if (cost == 0x7f)
return cost;
for (size_t i = 0; i < varCount && i < 7; ++i)
cost -= int((model >> (i * 8 + 8)) & 0x7f) * varsConst[i];
return cost;
}
} // namespace Compile
} // namespace Luau

View File

@ -0,0 +1,18 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/Ast.h"
namespace Luau
{
namespace Compile
{
// cost model: 8 bytes, where first byte is the baseline cost, and the next 7 bytes are discounts for when variable #i is constant
uint64_t modelCost(AstNode* root, AstLocal* const* vars, size_t varCount);
// cost is computed as B - sum(Di * Ci), where B is baseline cost, Di is the discount for each variable and Ci is 1 when variable #i is constant
int computeCost(uint64_t model, const bool* varsConst, size_t varCount);
} // namespace Compile
} // namespace Luau

View File

@ -299,7 +299,7 @@ LUA_API uintptr_t lua_encodepointer(lua_State* L, uintptr_t p);
LUA_API double lua_clock();
LUA_API void lua_setuserdatadtor(lua_State* L, int tag, void (*dtor)(void*));
LUA_API void lua_setuserdatadtor(lua_State* L, int tag, void (*dtor)(lua_State*, void*));
LUA_API void lua_clonefunction(lua_State* L, int idx);

View File

@ -14,6 +14,8 @@
#include <string.h>
LUAU_FASTFLAG(LuauGcWorkTrackFix)
const char* lua_ident = "$Lua: Lua 5.1.4 Copyright (C) 1994-2008 Lua.org, PUC-Rio $\n"
"$Authors: R. Ierusalimschy, L. H. de Figueiredo & W. Celes $\n"
"$URL: www.lua.org $\n";
@ -1050,6 +1052,7 @@ int lua_gc(lua_State* L, int what, int data)
{
size_t prevthreshold = g->GCthreshold;
size_t amount = (cast_to(size_t, data) << 10);
ptrdiff_t oldcredit = g->gcstate == GCSpause ? 0 : g->GCthreshold - g->totalbytes;
// temporarily adjust the threshold so that we can perform GC work
if (amount <= g->totalbytes)
@ -1069,9 +1072,9 @@ int lua_gc(lua_State* L, int what, int data)
while (g->GCthreshold <= g->totalbytes)
{
luaC_step(L, false);
size_t stepsize = luaC_step(L, false);
actualwork += g->gcstepsize;
actualwork += FFlag::LuauGcWorkTrackFix ? stepsize : g->gcstepsize;
if (g->gcstate == GCSpause)
{ /* end of cycle? */
@ -1107,11 +1110,20 @@ int lua_gc(lua_State* L, int what, int data)
// if cycle hasn't finished, advance threshold forward for the amount of extra work performed
if (g->gcstate != GCSpause)
{
// if a new cycle was triggered by explicit step, we ignore old threshold as that shows an incorrect 'credit' of GC work
if (waspaused)
g->GCthreshold = g->totalbytes + actualwork;
if (FFlag::LuauGcWorkTrackFix)
{
// if a new cycle was triggered by explicit step, old 'credit' of GC work is 0
ptrdiff_t newthreshold = g->totalbytes + actualwork + oldcredit;
g->GCthreshold = newthreshold < 0 ? 0 : newthreshold;
}
else
g->GCthreshold = prevthreshold + actualwork;
{
// if a new cycle was triggered by explicit step, we ignore old threshold as that shows an incorrect 'credit' of GC work
if (waspaused)
g->GCthreshold = g->totalbytes + actualwork;
else
g->GCthreshold = prevthreshold + actualwork;
}
}
break;
}
@ -1258,7 +1270,7 @@ const char* lua_setupvalue(lua_State* L, int funcindex, int n)
L->top--;
setobj(L, val, L->top);
luaC_barrier(L, clvalue(fi), L->top);
luaC_upvalbarrier(L, NULL, val);
luaC_upvalbarrier(L, cast_to(UpVal*, NULL), val);
}
return name;
}
@ -1311,7 +1323,7 @@ void lua_unref(lua_State* L, int ref)
return;
}
void lua_setuserdatadtor(lua_State* L, int tag, void (*dtor)(void*))
void lua_setuserdatadtor(lua_State* L, int tag, void (*dtor)(lua_State*, void*))
{
api_check(L, unsigned(tag) < LUA_UTAG_LIMIT);
L->global->udatagc[tag] = dtor;

View File

@ -15,6 +15,8 @@
#include <intrin.h>
#endif
LUAU_FASTFLAGVARIABLE(LuauFixBuiltinsStackLimit, false)
// luauF functions implement FASTCALL instruction that performs a direct execution of some builtin functions from the VM
// The rule of thumb is that FASTCALL functions can not call user code, yield, fail, or reallocate stack.
// If types of the arguments mismatch, luauF_* needs to return -1 and the execution will fall back to the usual call path
@ -1003,7 +1005,7 @@ static int luauF_tunpack(lua_State* L, StkId res, TValue* arg0, int nresults, St
else if (nparams == 3 && ttisnumber(args) && ttisnumber(args + 1) && nvalue(args) == 1.0)
n = int(nvalue(args + 1));
if (n >= 0 && n <= t->sizearray && cast_int(L->stack_last - res) >= n)
if (n >= 0 && n <= t->sizearray && cast_int(L->stack_last - res) >= n && (!FFlag::LuauFixBuiltinsStackLimit || n + nparams <= LUAI_MAXCSTACK))
{
TValue* array = t->array;
for (int i = 0; i < n; ++i)

View File

@ -14,6 +14,6 @@ LUAI_FUNC UpVal* luaF_findupval(lua_State* L, StkId level);
LUAI_FUNC void luaF_close(lua_State* L, StkId level);
LUAI_FUNC void luaF_freeproto(lua_State* L, Proto* f, struct lua_Page* page);
LUAI_FUNC void luaF_freeclosure(lua_State* L, Closure* c, struct lua_Page* page);
void luaF_unlinkupval(UpVal* uv);
LUAI_FUNC void luaF_unlinkupval(UpVal* uv);
LUAI_FUNC void luaF_freeupval(lua_State* L, UpVal* uv, struct lua_Page* page);
LUAI_FUNC const LocVar* luaF_getlocal(const Proto* func, int local_number, int pc);

View File

@ -13,9 +13,10 @@
#include <string.h>
#define GC_SWEEPMAX 40
#define GC_SWEEPCOST 10
#define GC_SWEEPPAGESTEPCOST 4
LUAU_FASTFLAGVARIABLE(LuauGcWorkTrackFix, false)
LUAU_FASTFLAGVARIABLE(LuauGcSweepCostFix, false)
#define GC_SWEEPPAGESTEPCOST (FFlag::LuauGcSweepCostFix ? 16 : 4)
#define GC_INTERRUPT(state) \
{ \
@ -64,7 +65,7 @@ static void recordGcStateStep(global_State* g, int startgcstate, double seconds,
case GCSpropagate:
case GCSpropagateagain:
g->gcmetrics.currcycle.marktime += seconds;
g->gcmetrics.currcycle.markrequests += g->gcstepsize;
g->gcmetrics.currcycle.markwork += work;
if (assist)
g->gcmetrics.currcycle.markassisttime += seconds;
@ -74,7 +75,7 @@ static void recordGcStateStep(global_State* g, int startgcstate, double seconds,
break;
case GCSsweep:
g->gcmetrics.currcycle.sweeptime += seconds;
g->gcmetrics.currcycle.sweeprequests += g->gcstepsize;
g->gcmetrics.currcycle.sweepwork += work;
if (assist)
g->gcmetrics.currcycle.sweepassisttime += seconds;
@ -87,13 +88,11 @@ static void recordGcStateStep(global_State* g, int startgcstate, double seconds,
{
g->gcmetrics.stepassisttimeacc += seconds;
g->gcmetrics.currcycle.assistwork += work;
g->gcmetrics.currcycle.assistrequests += g->gcstepsize;
}
else
{
g->gcmetrics.stepexplicittimeacc += seconds;
g->gcmetrics.currcycle.explicitwork += work;
g->gcmetrics.currcycle.explicitrequests += g->gcstepsize;
}
}
@ -878,11 +877,11 @@ static size_t getheaptrigger(global_State* g, size_t heapgoal)
return heaptrigger < int64_t(g->totalbytes) ? g->totalbytes : (heaptrigger > int64_t(heapgoal) ? heapgoal : size_t(heaptrigger));
}
void luaC_step(lua_State* L, bool assist)
size_t luaC_step(lua_State* L, bool assist)
{
global_State* g = L->global;
int lim = (g->gcstepsize / 100) * g->gcstepmul; /* how much to work */
int lim = FFlag::LuauGcWorkTrackFix ? g->gcstepsize * g->gcstepmul / 100 : (g->gcstepsize / 100) * g->gcstepmul; /* how much to work */
LUAU_ASSERT(g->totalbytes >= g->GCthreshold);
size_t debt = g->totalbytes - g->GCthreshold;
@ -902,12 +901,13 @@ void luaC_step(lua_State* L, bool assist)
int lastgcstate = g->gcstate;
size_t work = gcstep(L, lim);
(void)work;
#ifdef LUAI_GCMETRICS
recordGcStateStep(g, lastgcstate, lua_clock() - lasttimestamp, assist, work);
#endif
size_t actualstepsize = work * 100 / g->gcstepmul;
// at the end of the last cycle
if (g->gcstate == GCSpause)
{
@ -927,14 +927,16 @@ void luaC_step(lua_State* L, bool assist)
}
else
{
g->GCthreshold = g->totalbytes + g->gcstepsize;
g->GCthreshold = g->totalbytes + (FFlag::LuauGcWorkTrackFix ? actualstepsize : g->gcstepsize);
// compensate if GC is "behind schedule" (has some debt to pay)
if (g->GCthreshold > debt)
if (FFlag::LuauGcWorkTrackFix ? g->GCthreshold >= debt : g->GCthreshold > debt)
g->GCthreshold -= debt;
}
GC_INTERRUPT(lastgcstate);
return actualstepsize;
}
void luaC_fullgc(lua_State* L)

View File

@ -120,7 +120,7 @@
#define luaC_upvalbarrier(L, uv, tv) \
{ \
if (iscollectable(tv) && iswhite(gcvalue(tv)) && (!(uv) || ((UpVal*)uv)->v != &((UpVal*)uv)->u.value)) \
if (iscollectable(tv) && iswhite(gcvalue(tv)) && (!(uv) || (uv)->v != &(uv)->u.value)) \
luaC_barrierupval(L, gcvalue(tv)); \
}
@ -133,7 +133,7 @@
#define luaC_init(L, o, tt) luaC_initobj(L, cast_to(GCObject*, (o)), tt)
LUAI_FUNC void luaC_freeall(lua_State* L);
LUAI_FUNC void luaC_step(lua_State* L, bool assist);
LUAI_FUNC size_t luaC_step(lua_State* L, bool assist);
LUAI_FUNC void luaC_fullgc(lua_State* L);
LUAI_FUNC void luaC_initobj(lua_State* L, GCObject* o, uint8_t tt);
LUAI_FUNC void luaC_initupval(lua_State* L, UpVal* uv);

View File

@ -106,7 +106,7 @@ struct GCCycleMetrics
double markassisttime = 0.0;
double markmaxexplicittime = 0.0;
size_t markexplicitsteps = 0;
size_t markrequests = 0;
size_t markwork = 0;
double atomicstarttimestamp = 0.0;
size_t atomicstarttotalsizebytes = 0;
@ -122,10 +122,7 @@ struct GCCycleMetrics
double sweepassisttime = 0.0;
double sweepmaxexplicittime = 0.0;
size_t sweepexplicitsteps = 0;
size_t sweeprequests = 0;
size_t assistrequests = 0;
size_t explicitrequests = 0;
size_t sweepwork = 0;
size_t assistwork = 0;
size_t explicitwork = 0;
@ -203,7 +200,7 @@ typedef struct global_State
uint64_t rngstate; /* PCG random number generator state */
uint64_t ptrenckey[4]; /* pointer encoding key for display */
void (*udatagc[LUA_UTAG_LIMIT])(void*); /* for each userdata tag, a gc callback to be called immediately before freeing memory */
void (*udatagc[LUA_UTAG_LIMIT])(lua_State*, void*); /* for each userdata tag, a gc callback to be called immediately before freeing memory */
lua_Callbacks cb;

View File

@ -33,9 +33,6 @@
#include <string.h>
LUAU_FASTFLAGVARIABLE(LuauTableRehashRework, false)
LUAU_FASTFLAGVARIABLE(LuauTableNewBoundary, false)
// max size of both array and hash part is 2^MAXBITS
#define MAXBITS 26
#define MAXSIZE (1 << MAXBITS)
@ -390,6 +387,8 @@ static void resize(lua_State* L, Table* t, int nasize, int nhsize)
setarrayvector(L, t, nasize);
/* create new hash part with appropriate size */
setnodevector(L, t, nhsize);
/* used for the migration check at the end */
LuaNode* nnew = t->node;
if (nasize < oldasize)
{ /* array part must shrink? */
t->sizearray = nasize;
@ -398,57 +397,51 @@ static void resize(lua_State* L, Table* t, int nasize, int nhsize)
{
if (!ttisnil(&t->array[i]))
{
if (FFlag::LuauTableRehashRework)
{
TValue ok;
setnvalue(&ok, cast_num(i + 1));
setobjt2t(L, newkey(L, t, &ok), &t->array[i]);
}
else
{
setobjt2t(L, luaH_setnum(L, t, i + 1), &t->array[i]);
}
TValue ok;
setnvalue(&ok, cast_num(i + 1));
setobjt2t(L, newkey(L, t, &ok), &t->array[i]);
}
}
/* shrink array */
luaM_reallocarray(L, t->array, oldasize, nasize, TValue, t->memcat);
}
/* used for the migration check at the end */
TValue* anew = t->array;
/* re-insert elements from hash part */
if (FFlag::LuauTableRehashRework)
for (int i = twoto(oldhsize) - 1; i >= 0; i--)
{
for (int i = twoto(oldhsize) - 1; i >= 0; i--)
LuaNode* old = nold + i;
if (!ttisnil(gval(old)))
{
LuaNode* old = nold + i;
if (!ttisnil(gval(old)))
{
TValue ok;
getnodekey(L, &ok, old);
setobjt2t(L, arrayornewkey(L, t, &ok), gval(old));
}
}
}
else
{
for (int i = twoto(oldhsize) - 1; i >= 0; i--)
{
LuaNode* old = nold + i;
if (!ttisnil(gval(old)))
{
TValue ok;
getnodekey(L, &ok, old);
setobjt2t(L, luaH_set(L, t, &ok), gval(old));
}
TValue ok;
getnodekey(L, &ok, old);
setobjt2t(L, arrayornewkey(L, t, &ok), gval(old));
}
}
/* make sure we haven't recursively rehashed during element migration */
LUAU_ASSERT(nnew == t->node);
LUAU_ASSERT(anew == t->array);
if (nold != dummynode)
luaM_freearray(L, nold, twoto(oldhsize), LuaNode, t->memcat); /* free old array */
}
static int adjustasize(Table* t, int size, const TValue* ek)
{
bool tbound = t->node != dummynode || size < t->sizearray;
int ekindex = ek && ttisnumber(ek) ? arrayindex(nvalue(ek)) : -1;
/* move the array size up until the boundary is guaranteed to be inside the array part */
while (size + 1 == ekindex || (tbound && !ttisnil(luaH_getnum(t, size + 1))))
size++;
return size;
}
void luaH_resizearray(lua_State* L, Table* t, int nasize)
{
int nsize = (t->node == dummynode) ? 0 : sizenode(t);
resize(L, t, nasize, nsize);
int asize = adjustasize(t, nasize, NULL);
resize(L, t, asize, nsize);
}
void luaH_resizehash(lua_State* L, Table* t, int nhsize)
@ -470,21 +463,11 @@ static void rehash(lua_State* L, Table* t, const TValue* ek)
totaluse++;
/* compute new size for array part */
int na = computesizes(nums, &nasize);
int nh = totaluse - na;
/* enforce the boundary invariant; for performance, only do hash lookups if we must */
if (FFlag::LuauTableNewBoundary)
{
bool tbound = t->node != dummynode || nasize < t->sizearray;
int ekindex = ttisnumber(ek) ? arrayindex(nvalue(ek)) : -1;
/* move the array size up until the boundary is guaranteed to be inside the array part */
while (nasize + 1 == ekindex || (tbound && !ttisnil(luaH_getnum(t, nasize + 1))))
{
nasize++;
na++;
}
}
nasize = adjustasize(t, nasize, ek);
/* resize the table to new computed sizes */
LUAU_ASSERT(na <= totaluse);
resize(L, t, nasize, totaluse - na);
resize(L, t, nasize, nh);
}
/*
@ -544,11 +527,11 @@ static LuaNode* getfreepos(Table* t)
static TValue* newkey(lua_State* L, Table* t, const TValue* key)
{
/* enforce boundary invariant */
if (FFlag::LuauTableNewBoundary && ttisnumber(key) && nvalue(key) == t->sizearray + 1)
if (ttisnumber(key) && nvalue(key) == t->sizearray + 1)
{
rehash(L, t, key); /* grow table */
// after rehash, numeric keys might be located in the new array part, but won't be found in the node part
/* after rehash, numeric keys might be located in the new array part, but won't be found in the node part */
return arrayornewkey(L, t, key);
}
@ -560,15 +543,8 @@ static TValue* newkey(lua_State* L, Table* t, const TValue* key)
{ /* cannot find a free place? */
rehash(L, t, key); /* grow table */
if (!FFlag::LuauTableRehashRework)
{
return luaH_set(L, t, key); /* re-insert key into grown table */
}
else
{
// after rehash, numeric keys might be located in the new array part, but won't be found in the node part
return arrayornewkey(L, t, key);
}
/* after rehash, numeric keys might be located in the new array part, but won't be found in the node part */
return arrayornewkey(L, t, key);
}
LUAU_ASSERT(n != dummynode);
TValue mk;
@ -733,37 +709,6 @@ TValue* luaH_setstr(lua_State* L, Table* t, TString* key)
}
}
static LUAU_NOINLINE int unbound_search(Table* t, unsigned int j)
{
LUAU_ASSERT(!FFlag::LuauTableNewBoundary);
unsigned int i = j; /* i is zero or a present index */
j++;
/* find `i' and `j' such that i is present and j is not */
while (!ttisnil(luaH_getnum(t, j)))
{
i = j;
j *= 2;
if (j > cast_to(unsigned int, INT_MAX))
{ /* overflow? */
/* table was built with bad purposes: resort to linear search */
i = 1;
while (!ttisnil(luaH_getnum(t, i)))
i++;
return i - 1;
}
}
/* now do a binary search between them */
while (j - i > 1)
{
unsigned int m = (i + j) / 2;
if (ttisnil(luaH_getnum(t, m)))
j = m;
else
i = m;
}
return i;
}
static int updateaboundary(Table* t, int boundary)
{
if (boundary < t->sizearray && ttisnil(&t->array[boundary - 1]))
@ -820,17 +765,12 @@ int luaH_getn(Table* t)
maybesetaboundary(t, boundary);
return boundary;
}
else if (FFlag::LuauTableNewBoundary)
else
{
/* validate boundary invariant */
LUAU_ASSERT(t->node == dummynode || ttisnil(luaH_getnum(t, j + 1)));
return j;
}
/* else must find a boundary in hash part */
else if (t->node == dummynode) /* hash part is empty? */
return j; /* that is easy... */
else
return unbound_search(t, j);
}
Table* luaH_clone(lua_State* L, Table* tt)

View File

@ -199,9 +199,9 @@ static int tmove(lua_State* L)
int tt = !lua_isnoneornil(L, 5) ? 5 : 1; /* destination table */
luaL_checktype(L, tt, LUA_TTABLE);
void (*telemetrycb)(lua_State* L, int f, int e, int t, int nf, int nt) = lua_table_move_telemetry;
void (*telemetrycb)(lua_State * L, int f, int e, int t, int nf, int nt) = lua_table_move_telemetry;
if (DFFlag::LuauTableMoveTelemetry2 && telemetrycb)
if (DFFlag::LuauTableMoveTelemetry2 && telemetrycb && e >= f)
{
int nf = lua_objlen(L, 1);
int nt = lua_objlen(L, tt);

View File

@ -37,6 +37,8 @@ const char* const luaT_eventname[] = {
"__newindex",
"__mode",
"__namecall",
"__call",
"__iter",
"__eq",
@ -54,13 +56,13 @@ const char* const luaT_eventname[] = {
"__lt",
"__le",
"__concat",
"__call",
"__type",
};
// clang-format on
static_assert(sizeof(luaT_typenames) / sizeof(luaT_typenames[0]) == LUA_T_COUNT, "luaT_typenames size mismatch");
static_assert(sizeof(luaT_eventname) / sizeof(luaT_eventname[0]) == TM_N, "luaT_eventname size mismatch");
static_assert(TM_EQ < 8, "fasttm optimization stores a bitfield with metamethods in a byte");
void luaT_init(lua_State* L)
{

View File

@ -16,6 +16,8 @@ typedef enum
TM_NEWINDEX,
TM_MODE,
TM_NAMECALL,
TM_CALL,
TM_ITER,
TM_EQ, /* last tag method with `fast' access */
@ -33,7 +35,6 @@ typedef enum
TM_LT,
TM_LE,
TM_CONCAT,
TM_CALL,
TM_TYPE,
TM_N /* number of elements in the enum */

View File

@ -22,14 +22,21 @@ Udata* luaU_newudata(lua_State* L, size_t s, int tag)
void luaU_freeudata(lua_State* L, Udata* u, lua_Page* page)
{
void (*dtor)(void*) = nullptr;
if (u->tag < LUA_UTAG_LIMIT)
{
void (*dtor)(lua_State*, void*) = nullptr;
dtor = L->global->udatagc[u->tag];
if (dtor)
dtor(L, u->data);
}
else if (u->tag == UTAG_IDTOR)
{
void (*dtor)(void*) = nullptr;
memcpy(&dtor, &u->data + u->len - sizeof(dtor), sizeof(dtor));
if (dtor)
dtor(u->data);
}
if (dtor)
dtor(u->data);
luaM_freegco(L, u, sizeudata(u->len), u->memcat, page);
}

View File

@ -16,7 +16,10 @@
#include <string.h>
LUAU_FASTFLAG(LuauTableNewBoundary)
LUAU_FASTFLAGVARIABLE(LuauIter, false)
LUAU_DYNAMIC_FASTFLAGVARIABLE(LuauIterCallTelemetry, false)
void (*lua_iter_call_telemetry)(lua_State* L);
// Disable c99-designator to avoid the warning in CGOTO dispatch table
#ifdef __clang__
@ -110,7 +113,7 @@ LUAU_FASTFLAG(LuauTableNewBoundary)
VM_DISPATCH_OP(LOP_FORGLOOP_NEXT), VM_DISPATCH_OP(LOP_GETVARARGS), VM_DISPATCH_OP(LOP_DUPCLOSURE), VM_DISPATCH_OP(LOP_PREPVARARGS), \
VM_DISPATCH_OP(LOP_LOADKX), VM_DISPATCH_OP(LOP_JUMPX), VM_DISPATCH_OP(LOP_FASTCALL), VM_DISPATCH_OP(LOP_COVERAGE), \
VM_DISPATCH_OP(LOP_CAPTURE), VM_DISPATCH_OP(LOP_JUMPIFEQK), VM_DISPATCH_OP(LOP_JUMPIFNOTEQK), VM_DISPATCH_OP(LOP_FASTCALL1), \
VM_DISPATCH_OP(LOP_FASTCALL2), VM_DISPATCH_OP(LOP_FASTCALL2K),
VM_DISPATCH_OP(LOP_FASTCALL2), VM_DISPATCH_OP(LOP_FASTCALL2K), VM_DISPATCH_OP(LOP_FORGPREP),
#if defined(__GNUC__) || defined(__clang__)
#define VM_USE_CGOTO 1
@ -150,8 +153,20 @@ LUAU_NOINLINE static void luau_prepareFORN(lua_State* L, StkId plimit, StkId pst
LUAU_NOINLINE static bool luau_loopFORG(lua_State* L, int a, int c)
{
// note: it's safe to push arguments past top for complicated reasons (see top of the file)
StkId ra = &L->base[a];
LUAU_ASSERT(ra + 6 <= L->top);
LUAU_ASSERT(ra + 3 <= L->top);
if (DFFlag::LuauIterCallTelemetry)
{
/* TODO: we might be able to stop supporting this depending on whether it's used in practice */
void (*telemetrycb)(lua_State* L) = lua_iter_call_telemetry;
if (telemetrycb && ttistable(ra) && fasttm(L, hvalue(ra)->metatable, TM_CALL))
telemetrycb(L);
if (telemetrycb && ttisuserdata(ra) && fasttm(L, uvalue(ra)->metatable, TM_CALL))
telemetrycb(L);
}
setobjs2s(L, ra + 3 + 2, ra + 2);
setobjs2s(L, ra + 3 + 1, ra + 1);
@ -2204,20 +2219,149 @@ static void luau_execute(lua_State* L)
}
}
VM_CASE(LOP_FORGPREP)
{
Instruction insn = *pc++;
StkId ra = VM_REG(LUAU_INSN_A(insn));
if (ttisfunction(ra))
{
/* will be called during FORGLOOP */
}
else if (FFlag::LuauIter)
{
Table* mt = ttistable(ra) ? hvalue(ra)->metatable : ttisuserdata(ra) ? uvalue(ra)->metatable : cast_to(Table*, NULL);
if (const TValue* fn = fasttm(L, mt, TM_ITER))
{
setobj2s(L, ra + 1, ra);
setobj2s(L, ra, fn);
L->top = ra + 2; /* func + self arg */
LUAU_ASSERT(L->top <= L->stack_last);
VM_PROTECT(luaD_call(L, ra, 3));
L->top = L->ci->top;
}
else if (fasttm(L, mt, TM_CALL))
{
/* table or userdata with __call, will be called during FORGLOOP */
/* TODO: we might be able to stop supporting this depending on whether it's used in practice */
}
else if (ttistable(ra))
{
/* set up registers for builtin iteration */
setobj2s(L, ra + 1, ra);
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(0)));
setnilvalue(ra);
}
else
{
VM_PROTECT(luaG_typeerror(L, ra, "iterate over"));
}
}
pc += LUAU_INSN_D(insn);
LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
VM_NEXT();
}
VM_CASE(LOP_FORGLOOP)
{
VM_INTERRUPT();
Instruction insn = *pc++;
StkId ra = VM_REG(LUAU_INSN_A(insn));
uint32_t aux = *pc;
// note: this is a slow generic path, fast-path is FORGLOOP_INEXT/NEXT
bool stop;
VM_PROTECT(stop = luau_loopFORG(L, LUAU_INSN_A(insn), aux));
if (!FFlag::LuauIter)
{
bool stop;
VM_PROTECT(stop = luau_loopFORG(L, LUAU_INSN_A(insn), aux));
// note that we need to increment pc by 1 to exit the loop since we need to skip over aux
pc += stop ? 1 : LUAU_INSN_D(insn);
LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
VM_NEXT();
// note that we need to increment pc by 1 to exit the loop since we need to skip over aux
pc += stop ? 1 : LUAU_INSN_D(insn);
LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
VM_NEXT();
}
// fast-path: builtin table iteration
if (ttisnil(ra) && ttistable(ra + 1) && ttislightuserdata(ra + 2))
{
Table* h = hvalue(ra + 1);
int index = int(reinterpret_cast<uintptr_t>(pvalue(ra + 2)));
int sizearray = h->sizearray;
int sizenode = 1 << h->lsizenode;
// clear extra variables since we might have more than two
if (LUAU_UNLIKELY(aux > 2))
for (int i = 2; i < int(aux); ++i)
setnilvalue(ra + 3 + i);
// first we advance index through the array portion
while (unsigned(index) < unsigned(sizearray))
{
if (!ttisnil(&h->array[index]))
{
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)));
setnvalue(ra + 3, double(index + 1));
setobj2s(L, ra + 4, &h->array[index]);
pc += LUAU_INSN_D(insn);
LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
VM_NEXT();
}
index++;
}
// then we advance index through the hash portion
while (unsigned(index - sizearray) < unsigned(sizenode))
{
LuaNode* n = &h->node[index - sizearray];
if (!ttisnil(gval(n)))
{
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)));
getnodekey(L, ra + 3, n);
setobj2s(L, ra + 4, gval(n));
pc += LUAU_INSN_D(insn);
LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
VM_NEXT();
}
index++;
}
// fallthrough to exit
pc++;
VM_NEXT();
}
else
{
// note: it's safe to push arguments past top for complicated reasons (see top of the file)
setobjs2s(L, ra + 3 + 2, ra + 2);
setobjs2s(L, ra + 3 + 1, ra + 1);
setobjs2s(L, ra + 3, ra);
L->top = ra + 3 + 3; /* func + 2 args (state and index) */
LUAU_ASSERT(L->top <= L->stack_last);
VM_PROTECT(luaD_call(L, ra + 3, aux));
L->top = L->ci->top;
// recompute ra since stack might have been reallocated
ra = VM_REG(LUAU_INSN_A(insn));
// copy first variable back into the iteration index
setobjs2s(L, ra + 2, ra + 3);
// note that we need to increment pc by 1 to exit the loop since we need to skip over aux
pc += ttisnil(ra + 3) ? 1 : LUAU_INSN_D(insn);
LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
VM_NEXT();
}
}
VM_CASE(LOP_FORGPREP_INEXT)
@ -2228,8 +2372,15 @@ static void luau_execute(lua_State* L)
// fast-path: ipairs/inext
if (cl->env->safeenv && ttistable(ra + 1) && ttisnumber(ra + 2) && nvalue(ra + 2) == 0.0)
{
if (FFlag::LuauIter)
setnilvalue(ra);
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(0)));
}
else if (FFlag::LuauIter && !ttisfunction(ra))
{
VM_PROTECT(luaG_typeerror(L, ra, "iterate over"));
}
pc += LUAU_INSN_D(insn);
LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
@ -2268,23 +2419,9 @@ static void luau_execute(lua_State* L)
VM_NEXT();
}
}
else if (FFlag::LuauTableNewBoundary || (h->lsizenode == 0 && ttisnil(gval(h->node))))
{
// fallthrough to exit
VM_NEXT();
}
else
{
// the table has a hash part; index + 1 may appear in it in which case we need to iterate through the hash portion as well
const TValue* val = luaH_getnum(h, index + 1);
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)));
setnvalue(ra + 3, double(index + 1));
setobj2s(L, ra + 4, val);
// note that nil elements inside the array terminate the traversal
pc += ttisnil(ra + 4) ? 0 : LUAU_INSN_D(insn);
LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
// fallthrough to exit
VM_NEXT();
}
}
@ -2308,8 +2445,15 @@ static void luau_execute(lua_State* L)
// fast-path: pairs/next
if (cl->env->safeenv && ttistable(ra + 1) && ttisnil(ra + 2))
{
if (FFlag::LuauIter)
setnilvalue(ra);
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(0)));
}
else if (FFlag::LuauIter && !ttisfunction(ra))
{
VM_PROTECT(luaG_typeerror(L, ra, "iterate over"));
}
pc += LUAU_INSN_D(insn);
LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
@ -2704,7 +2848,7 @@ static void luau_execute(lua_State* L)
{
VM_PROTECT_PC();
int n = f(L, ra, arg, nresults, nullptr, nparams);
int n = f(L, ra, arg, nresults, NULL, nparams);
if (n >= 0)
{