v0.3.0+luau526
This commit is contained in:
parent
4e923b679b
commit
ba1f6ddd7f
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "luau0-src"
|
||||
version = "0.2.2+luau521"
|
||||
version = "0.3.0+luau526"
|
||||
authors = ["Aleksandr Orlenko <zxteam@protonmail.com>"]
|
||||
edition = "2018"
|
||||
repository = "https://github.com/khvzak/luau-src-rs"
|
||||
|
|
|
@ -313,7 +313,7 @@ template<typename T>
|
|||
struct AstArray
|
||||
{
|
||||
T* data;
|
||||
std::size_t size;
|
||||
size_t size;
|
||||
|
||||
const T* begin() const
|
||||
{
|
||||
|
|
|
@ -32,6 +32,7 @@ class DenseHashTable
|
|||
{
|
||||
public:
|
||||
class const_iterator;
|
||||
class iterator;
|
||||
|
||||
DenseHashTable(const Key& empty_key, size_t buckets = 0)
|
||||
: count(0)
|
||||
|
@ -43,7 +44,7 @@ public:
|
|||
// don't move this to initializer list! this works around an MSVC codegen issue on AMD CPUs:
|
||||
// https://developercommunity.visualstudio.com/t/stdvector-constructor-from-size-t-is-25-times-slow/1546547
|
||||
if (buckets)
|
||||
data.resize(buckets, ItemInterface::create(empty_key));
|
||||
resize_data<Item>(buckets);
|
||||
}
|
||||
|
||||
void clear()
|
||||
|
@ -125,7 +126,7 @@ public:
|
|||
if (data.empty() && data.capacity() >= newsize)
|
||||
{
|
||||
LUAU_ASSERT(count == 0);
|
||||
data.resize(newsize, ItemInterface::create(empty_key));
|
||||
resize_data<Item>(newsize);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -169,6 +170,21 @@ public:
|
|||
return const_iterator(this, data.size());
|
||||
}
|
||||
|
||||
iterator begin()
|
||||
{
|
||||
size_t start = 0;
|
||||
|
||||
while (start < data.size() && eq(ItemInterface::getKey(data[start]), empty_key))
|
||||
start++;
|
||||
|
||||
return iterator(this, start);
|
||||
}
|
||||
|
||||
iterator end()
|
||||
{
|
||||
return iterator(this, data.size());
|
||||
}
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
return count;
|
||||
|
@ -233,7 +249,82 @@ public:
|
|||
size_t index;
|
||||
};
|
||||
|
||||
class iterator
|
||||
{
|
||||
public:
|
||||
iterator()
|
||||
: set(0)
|
||||
, index(0)
|
||||
{
|
||||
}
|
||||
|
||||
iterator(DenseHashTable<Key, Item, MutableItem, ItemInterface, Hash, Eq>* set, size_t index)
|
||||
: set(set)
|
||||
, index(index)
|
||||
{
|
||||
}
|
||||
|
||||
MutableItem& operator*() const
|
||||
{
|
||||
return *reinterpret_cast<MutableItem*>(&set->data[index]);
|
||||
}
|
||||
|
||||
MutableItem* operator->() const
|
||||
{
|
||||
return reinterpret_cast<MutableItem*>(&set->data[index]);
|
||||
}
|
||||
|
||||
bool operator==(const iterator& other) const
|
||||
{
|
||||
return set == other.set && index == other.index;
|
||||
}
|
||||
|
||||
bool operator!=(const iterator& other) const
|
||||
{
|
||||
return set != other.set || index != other.index;
|
||||
}
|
||||
|
||||
iterator& operator++()
|
||||
{
|
||||
size_t size = set->data.size();
|
||||
|
||||
do
|
||||
{
|
||||
index++;
|
||||
} while (index < size && set->eq(ItemInterface::getKey(set->data[index]), set->empty_key));
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
iterator operator++(int)
|
||||
{
|
||||
iterator res = *this;
|
||||
++*this;
|
||||
return res;
|
||||
}
|
||||
|
||||
private:
|
||||
DenseHashTable<Key, Item, MutableItem, ItemInterface, Hash, Eq>* set;
|
||||
size_t index;
|
||||
};
|
||||
|
||||
private:
|
||||
template<typename T>
|
||||
void resize_data(size_t count, typename std::enable_if_t<std::is_copy_assignable_v<T>>* dummy = nullptr)
|
||||
{
|
||||
data.resize(count, ItemInterface::create(empty_key));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void resize_data(size_t count, typename std::enable_if_t<!std::is_copy_assignable_v<T>>* dummy = nullptr)
|
||||
{
|
||||
size_t size = data.size();
|
||||
data.resize(count);
|
||||
|
||||
for (size_t i = size; i < count; i++)
|
||||
data[i].first = empty_key;
|
||||
}
|
||||
|
||||
std::vector<Item> data;
|
||||
size_t count;
|
||||
Key empty_key;
|
||||
|
@ -290,6 +381,7 @@ class DenseHashSet
|
|||
|
||||
public:
|
||||
typedef typename Impl::const_iterator const_iterator;
|
||||
typedef typename Impl::iterator iterator;
|
||||
|
||||
DenseHashSet(const Key& empty_key, size_t buckets = 0)
|
||||
: impl(empty_key, buckets)
|
||||
|
@ -336,6 +428,16 @@ public:
|
|||
{
|
||||
return impl.end();
|
||||
}
|
||||
|
||||
iterator begin()
|
||||
{
|
||||
return impl.begin();
|
||||
}
|
||||
|
||||
iterator end()
|
||||
{
|
||||
return impl.end();
|
||||
}
|
||||
};
|
||||
|
||||
// This is a faster alternative of unordered_map, but it does not implement the same interface (i.e. it does not support erasing and has
|
||||
|
@ -348,6 +450,7 @@ class DenseHashMap
|
|||
|
||||
public:
|
||||
typedef typename Impl::const_iterator const_iterator;
|
||||
typedef typename Impl::iterator iterator;
|
||||
|
||||
DenseHashMap(const Key& empty_key, size_t buckets = 0)
|
||||
: impl(empty_key, buckets)
|
||||
|
@ -401,10 +504,21 @@ public:
|
|||
{
|
||||
return impl.begin();
|
||||
}
|
||||
|
||||
const_iterator end() const
|
||||
{
|
||||
return impl.end();
|
||||
}
|
||||
|
||||
iterator begin()
|
||||
{
|
||||
return impl.begin();
|
||||
}
|
||||
|
||||
iterator end()
|
||||
{
|
||||
return impl.end();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Luau
|
||||
|
|
|
@ -173,7 +173,7 @@ public:
|
|||
}
|
||||
|
||||
const Lexeme& next();
|
||||
const Lexeme& next(bool skipComments);
|
||||
const Lexeme& next(bool skipComments, bool updatePrevLocation);
|
||||
void nextline();
|
||||
|
||||
Lexeme lookahead();
|
||||
|
|
|
@ -19,6 +19,7 @@ std::string format(const char* fmt, ...) LUAU_PRINTF_ATTR(1, 2);
|
|||
std::string vformat(const char* fmt, va_list args);
|
||||
|
||||
void formatAppend(std::string& str, const char* fmt, ...) LUAU_PRINTF_ATTR(2, 3);
|
||||
void vformatAppend(std::string& ret, const char* fmt, va_list args);
|
||||
|
||||
std::string join(const std::vector<std::string_view>& segments, std::string_view delimiter);
|
||||
std::string join(const std::vector<std::string>& segments, std::string_view delimiter);
|
||||
|
|
|
@ -9,14 +9,21 @@
|
|||
|
||||
LUAU_FASTFLAG(DebugLuauTimeTracing)
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace TimeTrace
|
||||
{
|
||||
double getClock();
|
||||
uint32_t getClockMicroseconds();
|
||||
} // namespace TimeTrace
|
||||
} // namespace Luau
|
||||
|
||||
#if defined(LUAU_ENABLE_TIME_TRACE)
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace TimeTrace
|
||||
{
|
||||
uint32_t getClockMicroseconds();
|
||||
|
||||
struct Token
|
||||
{
|
||||
const char* name;
|
||||
|
|
|
@ -6,8 +6,6 @@
|
|||
|
||||
#include <limits.h>
|
||||
|
||||
LUAU_FASTFLAGVARIABLE(LuauParseLocationIgnoreCommentSkip, false)
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
|
||||
|
@ -349,13 +347,11 @@ void Lexer::setReadNames(bool read)
|
|||
|
||||
const Lexeme& Lexer::next()
|
||||
{
|
||||
return next(this->skipComments);
|
||||
return next(this->skipComments, true);
|
||||
}
|
||||
|
||||
const Lexeme& Lexer::next(bool skipComments)
|
||||
const Lexeme& Lexer::next(bool skipComments, bool updatePrevLocation)
|
||||
{
|
||||
bool first = true;
|
||||
|
||||
// in skipComments mode we reject valid comments
|
||||
do
|
||||
{
|
||||
|
@ -363,11 +359,11 @@ const Lexeme& Lexer::next(bool skipComments)
|
|||
while (isSpace(peekch()))
|
||||
consume();
|
||||
|
||||
if (!FFlag::LuauParseLocationIgnoreCommentSkip || first)
|
||||
if (updatePrevLocation)
|
||||
prevLocation = lexeme.location;
|
||||
|
||||
lexeme = readNext();
|
||||
first = false;
|
||||
updatePrevLocation = false;
|
||||
} while (skipComments && (lexeme.type == Lexeme::Comment || lexeme.type == Lexeme::BlockComment));
|
||||
|
||||
return lexeme;
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
// See docs/SyntaxChanges.md for an explanation.
|
||||
LUAU_FASTINTVARIABLE(LuauRecursionLimit, 1000)
|
||||
LUAU_FASTINTVARIABLE(LuauParseErrorLimit, 100)
|
||||
LUAU_FASTFLAGVARIABLE(LuauParseLocationIgnoreCommentSkipInCapture, false)
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
|
@ -165,6 +166,7 @@ Parser::Parser(const char* buffer, size_t bufferSize, AstNameTable& names, Alloc
|
|||
Function top;
|
||||
top.vararg = true;
|
||||
|
||||
functionStack.reserve(8);
|
||||
functionStack.push_back(top);
|
||||
|
||||
nameSelf = names.addStatic("self");
|
||||
|
@ -184,6 +186,13 @@ Parser::Parser(const char* buffer, size_t bufferSize, AstNameTable& names, Alloc
|
|||
|
||||
// all hot comments parsed after the first non-comment lexeme are special in that they don't affect type checking / linting mode
|
||||
hotcommentHeader = false;
|
||||
|
||||
// preallocate some buffers that are very likely to grow anyway; this works around std::vector's inefficient growth policy for small arrays
|
||||
localStack.reserve(16);
|
||||
scratchStat.reserve(16);
|
||||
scratchExpr.reserve(16);
|
||||
scratchLocal.reserve(16);
|
||||
scratchBinding.reserve(16);
|
||||
}
|
||||
|
||||
bool Parser::blockFollow(const Lexeme& l)
|
||||
|
@ -1420,6 +1429,11 @@ AstType* Parser::parseTypeAnnotation(TempVector<AstType*>& parts, const Location
|
|||
parts.push_back(parseSimpleTypeAnnotation(/* allowPack= */ false).type);
|
||||
isIntersection = true;
|
||||
}
|
||||
else if (c == Lexeme::Dot3)
|
||||
{
|
||||
report(lexer.current().location, "Unexpected '...' after type annotation");
|
||||
nextLexeme();
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
@ -1536,6 +1550,11 @@ AstTypeOrPack Parser::parseSimpleTypeAnnotation(bool allowPack)
|
|||
prefix = name.name;
|
||||
name = parseIndexName("field name", pointPosition);
|
||||
}
|
||||
else if (lexer.current().type == Lexeme::Dot3)
|
||||
{
|
||||
report(lexer.current().location, "Unexpected '...' after type name; type pack is not allowed in this context");
|
||||
nextLexeme();
|
||||
}
|
||||
else if (name.name == "typeof")
|
||||
{
|
||||
Lexeme typeofBegin = lexer.current();
|
||||
|
@ -2778,7 +2797,7 @@ void Parser::nextLexeme()
|
|||
{
|
||||
if (options.captureComments)
|
||||
{
|
||||
Lexeme::Type type = lexer.next(/* skipComments= */ false).type;
|
||||
Lexeme::Type type = lexer.next(/* skipComments= */ false, true).type;
|
||||
|
||||
while (type == Lexeme::BrokenComment || type == Lexeme::Comment || type == Lexeme::BlockComment)
|
||||
{
|
||||
|
@ -2802,7 +2821,7 @@ void Parser::nextLexeme()
|
|||
hotcomments.push_back({hotcommentHeader, lexeme.location, std::string(text + 1, text + end)});
|
||||
}
|
||||
|
||||
type = lexer.next(/* skipComments= */ false).type;
|
||||
type = lexer.next(/* skipComments= */ false, !FFlag::LuauParseLocationIgnoreCommentSkipInCapture).type;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
namespace Luau
|
||||
{
|
||||
|
||||
static void vformatAppend(std::string& ret, const char* fmt, va_list args)
|
||||
void vformatAppend(std::string& ret, const char* fmt, va_list args)
|
||||
{
|
||||
va_list argscopy;
|
||||
va_copy(argscopy, args);
|
||||
|
|
|
@ -26,9 +26,6 @@
|
|||
#include <time.h>
|
||||
|
||||
LUAU_FASTFLAGVARIABLE(DebugLuauTimeTracing, false)
|
||||
|
||||
#if defined(LUAU_ENABLE_TIME_TRACE)
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace TimeTrace
|
||||
|
@ -67,6 +64,14 @@ static double getClockTimestamp()
|
|||
#endif
|
||||
}
|
||||
|
||||
double getClock()
|
||||
{
|
||||
static double period = getClockPeriod();
|
||||
static double start = getClockTimestamp();
|
||||
|
||||
return (getClockTimestamp() - start) * period;
|
||||
}
|
||||
|
||||
uint32_t getClockMicroseconds()
|
||||
{
|
||||
static double period = getClockPeriod() * 1e6;
|
||||
|
@ -74,7 +79,15 @@ uint32_t getClockMicroseconds()
|
|||
|
||||
return uint32_t((getClockTimestamp() - start) * period);
|
||||
}
|
||||
} // namespace TimeTrace
|
||||
} // namespace Luau
|
||||
|
||||
#if defined(LUAU_ENABLE_TIME_TRACE)
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace TimeTrace
|
||||
{
|
||||
struct GlobalContext
|
||||
{
|
||||
GlobalContext() = default;
|
||||
|
|
|
@ -353,6 +353,11 @@ enum LuauOpcode
|
|||
// AUX: constant index
|
||||
LOP_FASTCALL2K,
|
||||
|
||||
// FORGPREP: prepare loop variables for a generic for loop, jump to the loop backedge unconditionally
|
||||
// A: target register; generic for loops assume a register layout [generator, state, index, variables...]
|
||||
// D: jump offset (-32768..32767)
|
||||
LOP_FORGPREP,
|
||||
|
||||
// Enum entry for number of opcodes, not a valid opcode by itself!
|
||||
LOP__COUNT
|
||||
};
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include "Luau/Bytecode.h"
|
||||
#include "Luau/DenseHash.h"
|
||||
#include "Luau/StringUtils.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
|
@ -80,6 +81,8 @@ public:
|
|||
void pushDebugUpval(StringRef name);
|
||||
uint32_t getDebugPC() const;
|
||||
|
||||
void addDebugRemark(const char* format, ...) LUAU_PRINTF_ATTR(2, 3);
|
||||
|
||||
void finalize();
|
||||
|
||||
enum DumpFlags
|
||||
|
@ -88,6 +91,7 @@ public:
|
|||
Dump_Lines = 1 << 1,
|
||||
Dump_Source = 1 << 2,
|
||||
Dump_Locals = 1 << 3,
|
||||
Dump_Remarks = 1 << 4,
|
||||
};
|
||||
|
||||
void setDumpFlags(uint32_t flags)
|
||||
|
@ -228,6 +232,9 @@ private:
|
|||
|
||||
DenseHashMap<StringRef, unsigned int, StringRefHash> stringTable;
|
||||
|
||||
std::vector<std::pair<uint32_t, uint32_t>> debugRemarks;
|
||||
std::string debugRemarkBuffer;
|
||||
|
||||
BytecodeEncoder* encoder = nullptr;
|
||||
std::string bytecode;
|
||||
|
||||
|
|
|
@ -96,6 +96,7 @@ inline bool isJumpD(LuauOpcode op)
|
|||
case LOP_JUMPIFNOTLT:
|
||||
case LOP_FORNPREP:
|
||||
case LOP_FORNLOOP:
|
||||
case LOP_FORGPREP:
|
||||
case LOP_FORGLOOP:
|
||||
case LOP_FORGPREP_INEXT:
|
||||
case LOP_FORGLOOP_INEXT:
|
||||
|
@ -184,6 +185,13 @@ BytecodeBuilder::BytecodeBuilder(BytecodeEncoder* encoder)
|
|||
, encoder(encoder)
|
||||
{
|
||||
LUAU_ASSERT(stringTable.find(StringRef{"", 0}) == nullptr);
|
||||
|
||||
// preallocate some buffers that are very likely to grow anyway; this works around std::vector's inefficient growth policy for small arrays
|
||||
insns.reserve(32);
|
||||
lines.reserve(32);
|
||||
constants.reserve(16);
|
||||
protos.reserve(16);
|
||||
functions.reserve(8);
|
||||
}
|
||||
|
||||
uint32_t BytecodeBuilder::beginFunction(uint8_t numparams, bool isvararg)
|
||||
|
@ -219,8 +227,8 @@ void BytecodeBuilder::endFunction(uint8_t maxstacksize, uint8_t numupvalues)
|
|||
validate();
|
||||
#endif
|
||||
|
||||
// very approximate: 4 bytes per instruction for code, 1 byte for debug line, and 1-2 bytes for aux data like constants
|
||||
func.data.reserve(insns.size() * 7);
|
||||
// very approximate: 4 bytes per instruction for code, 1 byte for debug line, and 1-2 bytes for aux data like constants plus overhead
|
||||
func.data.reserve(32 + insns.size() * 7);
|
||||
|
||||
writeFunction(func.data, currentFunction);
|
||||
|
||||
|
@ -242,10 +250,15 @@ void BytecodeBuilder::endFunction(uint8_t maxstacksize, uint8_t numupvalues)
|
|||
|
||||
constantMap.clear();
|
||||
tableShapeMap.clear();
|
||||
|
||||
debugRemarks.clear();
|
||||
debugRemarkBuffer.clear();
|
||||
}
|
||||
|
||||
void BytecodeBuilder::setMainFunction(uint32_t fid)
|
||||
{
|
||||
LUAU_ASSERT(fid < functions.size());
|
||||
|
||||
mainFunction = fid;
|
||||
}
|
||||
|
||||
|
@ -505,9 +518,40 @@ uint32_t BytecodeBuilder::getDebugPC() const
|
|||
return uint32_t(insns.size());
|
||||
}
|
||||
|
||||
void BytecodeBuilder::addDebugRemark(const char* format, ...)
|
||||
{
|
||||
if ((dumpFlags & Dump_Remarks) == 0)
|
||||
return;
|
||||
|
||||
size_t offset = debugRemarkBuffer.size();
|
||||
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
vformatAppend(debugRemarkBuffer, format, args);
|
||||
va_end(args);
|
||||
|
||||
// we null-terminate all remarks to avoid storing remark length
|
||||
debugRemarkBuffer += '\0';
|
||||
|
||||
debugRemarks.emplace_back(uint32_t(insns.size()), uint32_t(offset));
|
||||
}
|
||||
|
||||
void BytecodeBuilder::finalize()
|
||||
{
|
||||
LUAU_ASSERT(bytecode.empty());
|
||||
|
||||
// preallocate space for bytecode blob
|
||||
size_t capacity = 16;
|
||||
|
||||
for (auto& p : stringTable)
|
||||
capacity += p.first.length + 2;
|
||||
|
||||
for (const Function& func : functions)
|
||||
capacity += func.data.size();
|
||||
|
||||
bytecode.reserve(capacity);
|
||||
|
||||
// assemble final bytecode blob
|
||||
bytecode = char(LBC_VERSION);
|
||||
|
||||
writeStringTable(bytecode);
|
||||
|
@ -663,6 +707,8 @@ void BytecodeBuilder::writeFunction(std::string& ss, uint32_t id) const
|
|||
|
||||
void BytecodeBuilder::writeLineInfo(std::string& ss) const
|
||||
{
|
||||
LUAU_ASSERT(!lines.empty());
|
||||
|
||||
// this function encodes lines inside each span as a 8-bit delta to span baseline
|
||||
// span is always a power of two; depending on the line info input, it may need to be as low as 1
|
||||
int span = 1 << 24;
|
||||
|
@ -693,7 +739,17 @@ void BytecodeBuilder::writeLineInfo(std::string& ss) const
|
|||
}
|
||||
|
||||
// second pass: compute span base
|
||||
std::vector<int> baseline((lines.size() - 1) / span + 1);
|
||||
int baselineOne = 0;
|
||||
std::vector<int> baselineScratch;
|
||||
int* baseline = &baselineOne;
|
||||
size_t baselineSize = (lines.size() - 1) / span + 1;
|
||||
|
||||
if (baselineSize > 1)
|
||||
{
|
||||
// avoid heap allocation for single-element baseline which is most functions (<256 lines)
|
||||
baselineScratch.resize(baselineSize);
|
||||
baseline = baselineScratch.data();
|
||||
}
|
||||
|
||||
for (size_t offset = 0; offset < lines.size(); offset += span)
|
||||
{
|
||||
|
@ -725,7 +781,7 @@ void BytecodeBuilder::writeLineInfo(std::string& ss) const
|
|||
|
||||
int lastLine = 0;
|
||||
|
||||
for (size_t i = 0; i < baseline.size(); ++i)
|
||||
for (size_t i = 0; i < baselineSize; ++i)
|
||||
{
|
||||
writeInt(ss, baseline[i] - lastLine);
|
||||
lastLine = baseline[i];
|
||||
|
@ -1214,6 +1270,11 @@ void BytecodeBuilder::validate() const
|
|||
VJUMP(LUAU_INSN_D(insn));
|
||||
break;
|
||||
|
||||
case LOP_FORGPREP:
|
||||
VREG(LUAU_INSN_A(insn) + 2 + 1); // forg loop protocol: A, A+1, A+2 are used for iteration protocol; A+3, ... are loop variables
|
||||
VJUMP(LUAU_INSN_D(insn));
|
||||
break;
|
||||
|
||||
case LOP_FORGLOOP:
|
||||
VREG(
|
||||
LUAU_INSN_A(insn) + 2 + insns[i + 1]); // forg loop protocol: A, A+1, A+2 are used for iteration protocol; A+3, ... are loop variables
|
||||
|
@ -1567,6 +1628,10 @@ const uint32_t* BytecodeBuilder::dumpInstruction(const uint32_t* code, std::stri
|
|||
formatAppend(result, "FORNLOOP R%d %+d\n", LUAU_INSN_A(insn), LUAU_INSN_D(insn));
|
||||
break;
|
||||
|
||||
case LOP_FORGPREP:
|
||||
formatAppend(result, "FORGPREP R%d %+d\n", LUAU_INSN_A(insn), LUAU_INSN_D(insn));
|
||||
break;
|
||||
|
||||
case LOP_FORGLOOP:
|
||||
formatAppend(result, "FORGLOOP R%d %+d %d\n", LUAU_INSN_A(insn), LUAU_INSN_D(insn), *code++);
|
||||
break;
|
||||
|
@ -1665,6 +1730,7 @@ std::string BytecodeBuilder::dumpCurrentFunction() const
|
|||
const uint32_t* codeEnd = insns.data() + insns.size();
|
||||
|
||||
int lastLine = -1;
|
||||
size_t nextRemark = 0;
|
||||
|
||||
std::string result;
|
||||
|
||||
|
@ -1687,6 +1753,7 @@ std::string BytecodeBuilder::dumpCurrentFunction() const
|
|||
while (code != codeEnd)
|
||||
{
|
||||
uint8_t op = LUAU_INSN_OP(*code);
|
||||
uint32_t pc = uint32_t(code - insns.data());
|
||||
|
||||
if (op == LOP_PREPVARARGS)
|
||||
{
|
||||
|
@ -1695,9 +1762,18 @@ std::string BytecodeBuilder::dumpCurrentFunction() const
|
|||
continue;
|
||||
}
|
||||
|
||||
if (dumpFlags & Dump_Remarks)
|
||||
{
|
||||
while (nextRemark < debugRemarks.size() && debugRemarks[nextRemark].first == pc)
|
||||
{
|
||||
formatAppend(result, "REMARK %s\n", debugRemarkBuffer.c_str() + debugRemarks[nextRemark].second);
|
||||
nextRemark++;
|
||||
}
|
||||
}
|
||||
|
||||
if (dumpFlags & Dump_Source)
|
||||
{
|
||||
int line = lines[code - insns.data()];
|
||||
int line = lines[pc];
|
||||
|
||||
if (line > 0 && line != lastLine)
|
||||
{
|
||||
|
@ -1709,7 +1785,7 @@ std::string BytecodeBuilder::dumpCurrentFunction() const
|
|||
|
||||
if (dumpFlags & Dump_Lines)
|
||||
{
|
||||
formatAppend(result, "%d: ", lines[code - insns.data()]);
|
||||
formatAppend(result, "%d: ", lines[pc]);
|
||||
}
|
||||
|
||||
code = dumpInstruction(code, result);
|
||||
|
@ -1722,11 +1798,11 @@ void BytecodeBuilder::setDumpSource(const std::string& source)
|
|||
{
|
||||
dumpSource.clear();
|
||||
|
||||
std::string::size_type pos = 0;
|
||||
size_t pos = 0;
|
||||
|
||||
while (pos != std::string::npos)
|
||||
{
|
||||
std::string::size_type next = source.find('\n', pos);
|
||||
size_t next = source.find('\n', pos);
|
||||
|
||||
if (next == std::string::npos)
|
||||
{
|
||||
|
|
|
@ -8,12 +8,27 @@
|
|||
|
||||
#include "Builtins.h"
|
||||
#include "ConstantFolding.h"
|
||||
#include "CostModel.h"
|
||||
#include "TableShape.h"
|
||||
#include "ValueTracking.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <bitset>
|
||||
#include <math.h>
|
||||
#include <limits.h>
|
||||
|
||||
LUAU_FASTFLAGVARIABLE(LuauCompileSupportInlining, false)
|
||||
|
||||
LUAU_FASTFLAGVARIABLE(LuauCompileIter, false)
|
||||
LUAU_FASTFLAGVARIABLE(LuauCompileIterNoReserve, false)
|
||||
LUAU_FASTFLAGVARIABLE(LuauCompileIterNoPairs, false)
|
||||
|
||||
LUAU_FASTINTVARIABLE(LuauCompileLoopUnrollThreshold, 25)
|
||||
LUAU_FASTINTVARIABLE(LuauCompileLoopUnrollThresholdMaxBoost, 300)
|
||||
|
||||
LUAU_FASTINTVARIABLE(LuauCompileInlineThreshold, 25)
|
||||
LUAU_FASTINTVARIABLE(LuauCompileInlineThresholdMaxBoost, 300)
|
||||
LUAU_FASTINTVARIABLE(LuauCompileInlineDepth, 5)
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
|
@ -77,8 +92,12 @@ struct Compiler
|
|||
, globals(AstName())
|
||||
, variables(nullptr)
|
||||
, constants(nullptr)
|
||||
, locstants(nullptr)
|
||||
, tableShapes(nullptr)
|
||||
{
|
||||
// preallocate some buffers that are very likely to grow anyway; this works around std::vector's inefficient growth policy for small arrays
|
||||
localStack.reserve(16);
|
||||
upvals.reserve(16);
|
||||
}
|
||||
|
||||
uint8_t getLocal(AstLocal* local)
|
||||
|
@ -138,6 +157,52 @@ struct Compiler
|
|||
}
|
||||
}
|
||||
|
||||
AstExprFunction* getFunctionExpr(AstExpr* node)
|
||||
{
|
||||
if (AstExprLocal* le = node->as<AstExprLocal>())
|
||||
{
|
||||
Variable* lv = variables.find(le->local);
|
||||
|
||||
if (!lv || lv->written || !lv->init)
|
||||
return nullptr;
|
||||
|
||||
return getFunctionExpr(lv->init);
|
||||
}
|
||||
else if (AstExprGroup* ge = node->as<AstExprGroup>())
|
||||
return getFunctionExpr(ge->expr);
|
||||
else
|
||||
return node->as<AstExprFunction>();
|
||||
}
|
||||
|
||||
bool canInlineFunctionBody(AstStat* stat)
|
||||
{
|
||||
struct CanInlineVisitor : AstVisitor
|
||||
{
|
||||
bool result = true;
|
||||
|
||||
bool visit(AstExpr* node) override
|
||||
{
|
||||
// nested functions may capture function arguments, and our upval handling doesn't handle elided variables (constant)
|
||||
// TODO: we could remove this case if we changed function compilation to create temporary locals for constant upvalues
|
||||
// TODO: additionally we would need to change upvalue handling in compileExprFunction to handle upvalue->local migration
|
||||
result = result && !node->is<AstExprFunction>();
|
||||
return result;
|
||||
}
|
||||
|
||||
bool visit(AstStat* node) override
|
||||
{
|
||||
// loops may need to be unrolled which can result in cost amplification
|
||||
result = result && !node->is<AstStatFor>();
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
CanInlineVisitor canInline;
|
||||
stat->visit(&canInline);
|
||||
|
||||
return canInline.result;
|
||||
}
|
||||
|
||||
uint32_t compileFunction(AstExprFunction* func)
|
||||
{
|
||||
LUAU_TIMETRACE_SCOPE("Compiler::compileFunction", "Compiler");
|
||||
|
@ -205,11 +270,21 @@ struct Compiler
|
|||
|
||||
bytecode.endFunction(uint8_t(stackSize), uint8_t(upvals.size()));
|
||||
|
||||
stackSize = 0;
|
||||
|
||||
Function& f = functions[func];
|
||||
f.id = fid;
|
||||
f.upvals = std::move(upvals);
|
||||
f.upvals = upvals;
|
||||
|
||||
// record information for inlining
|
||||
if (FFlag::LuauCompileSupportInlining && options.optimizationLevel >= 2 && !func->vararg && canInlineFunctionBody(func->body) &&
|
||||
!getfenvUsed && !setfenvUsed)
|
||||
{
|
||||
f.canInline = true;
|
||||
f.stackSize = stackSize;
|
||||
f.costModel = modelCost(func->body, func->args.data, func->args.size);
|
||||
}
|
||||
|
||||
upvals.clear(); // note: instead of std::move above, we copy & clear to preserve capacity for future pushes
|
||||
stackSize = 0;
|
||||
|
||||
return fid;
|
||||
}
|
||||
|
@ -379,12 +454,183 @@ struct Compiler
|
|||
}
|
||||
}
|
||||
|
||||
bool tryCompileInlinedCall(AstExprCall* expr, AstExprFunction* func, uint8_t target, uint8_t targetCount, bool multRet, int thresholdBase,
|
||||
int thresholdMaxBoost, int depthLimit)
|
||||
{
|
||||
Function* fi = functions.find(func);
|
||||
LUAU_ASSERT(fi);
|
||||
|
||||
// make sure we have enough register space
|
||||
if (regTop > 128 || fi->stackSize > 32)
|
||||
{
|
||||
bytecode.addDebugRemark("inlining failed: high register pressure");
|
||||
return false;
|
||||
}
|
||||
|
||||
// we should ideally aggregate the costs during recursive inlining, but for now simply limit the depth
|
||||
if (int(inlineFrames.size()) >= depthLimit)
|
||||
{
|
||||
bytecode.addDebugRemark("inlining failed: too many inlined frames");
|
||||
return false;
|
||||
}
|
||||
|
||||
// compiling recursive inlining is difficult because we share constant/variable state but need to bind variables to different registers
|
||||
for (InlineFrame& frame : inlineFrames)
|
||||
if (frame.func == func)
|
||||
{
|
||||
bytecode.addDebugRemark("inlining failed: can't inline recursive calls");
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO: we can compile multret functions if all returns of the function are multret as well
|
||||
if (multRet)
|
||||
{
|
||||
bytecode.addDebugRemark("inlining failed: can't convert fixed returns to multret");
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO: we can compile functions with mismatching arity at call site but it's more annoying
|
||||
if (func->args.size != expr->args.size)
|
||||
{
|
||||
bytecode.addDebugRemark("inlining failed: argument count mismatch (expected %d, got %d)", int(func->args.size), int(expr->args.size));
|
||||
return false;
|
||||
}
|
||||
|
||||
// we use a dynamic cost threshold that's based on the fixed limit boosted by the cost advantage we gain due to inlining
|
||||
bool varc[8] = {};
|
||||
for (size_t i = 0; i < expr->args.size && i < 8; ++i)
|
||||
varc[i] = isConstant(expr->args.data[i]);
|
||||
|
||||
int inlinedCost = computeCost(fi->costModel, varc, std::min(int(expr->args.size), 8));
|
||||
int baselineCost = computeCost(fi->costModel, nullptr, 0) + 3;
|
||||
int inlineProfit = (inlinedCost == 0) ? thresholdMaxBoost : std::min(thresholdMaxBoost, 100 * baselineCost / inlinedCost);
|
||||
|
||||
int threshold = thresholdBase * inlineProfit / 100;
|
||||
|
||||
if (inlinedCost > threshold)
|
||||
{
|
||||
bytecode.addDebugRemark("inlining failed: too expensive (cost %d, profit %.2fx)", inlinedCost, double(inlineProfit) / 100);
|
||||
return false;
|
||||
}
|
||||
|
||||
bytecode.addDebugRemark(
|
||||
"inlining succeeded (cost %d, profit %.2fx, depth %d)", inlinedCost, double(inlineProfit) / 100, int(inlineFrames.size()));
|
||||
|
||||
compileInlinedCall(expr, func, target, targetCount);
|
||||
return true;
|
||||
}
|
||||
|
||||
void compileInlinedCall(AstExprCall* expr, AstExprFunction* func, uint8_t target, uint8_t targetCount)
|
||||
{
|
||||
RegScope rs(this);
|
||||
|
||||
size_t oldLocals = localStack.size();
|
||||
|
||||
// note that we push the frame early; this is needed to block recursive inline attempts
|
||||
inlineFrames.push_back({func, target, targetCount});
|
||||
|
||||
// evaluate all arguments; note that we don't emit code for constant arguments (relying on constant folding)
|
||||
for (size_t i = 0; i < func->args.size; ++i)
|
||||
{
|
||||
AstLocal* var = func->args.data[i];
|
||||
AstExpr* arg = expr->args.data[i];
|
||||
|
||||
if (Variable* vv = variables.find(var); vv && vv->written)
|
||||
{
|
||||
// if the argument is mutated, we need to allocate a fresh register even if it's a constant
|
||||
uint8_t reg = allocReg(arg, 1);
|
||||
compileExprTemp(arg, reg);
|
||||
pushLocal(var, reg);
|
||||
}
|
||||
else if (const Constant* cv = constants.find(arg); cv && cv->type != Constant::Type_Unknown)
|
||||
{
|
||||
// since the argument is not mutated, we can simply fold the value into the expressions that need it
|
||||
locstants[var] = *cv;
|
||||
}
|
||||
else
|
||||
{
|
||||
AstExprLocal* le = arg->as<AstExprLocal>();
|
||||
Variable* lv = le ? variables.find(le->local) : nullptr;
|
||||
|
||||
// if the argument is a local that isn't mutated, we will simply reuse the existing register
|
||||
if (isExprLocalReg(arg) && (!lv || !lv->written))
|
||||
{
|
||||
uint8_t reg = getLocal(le->local);
|
||||
pushLocal(var, reg);
|
||||
}
|
||||
else
|
||||
{
|
||||
uint8_t reg = allocReg(arg, 1);
|
||||
compileExprTemp(arg, reg);
|
||||
pushLocal(var, reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// fold constant values updated above into expressions in the function body
|
||||
foldConstants(constants, variables, locstants, func->body);
|
||||
|
||||
bool usedFallthrough = false;
|
||||
|
||||
for (size_t i = 0; i < func->body->body.size; ++i)
|
||||
{
|
||||
AstStat* stat = func->body->body.data[i];
|
||||
|
||||
if (AstStatReturn* ret = stat->as<AstStatReturn>())
|
||||
{
|
||||
// Optimization: use fallthrough when compiling return at the end of the function to avoid an extra JUMP
|
||||
compileInlineReturn(ret, /* fallthrough= */ true);
|
||||
// TODO: This doesn't work when return is part of control flow; ideally we would track the state somehow and generalize this
|
||||
usedFallthrough = true;
|
||||
break;
|
||||
}
|
||||
else
|
||||
compileStat(stat);
|
||||
}
|
||||
|
||||
// for the fallthrough path we need to ensure we clear out target registers
|
||||
if (!usedFallthrough && !allPathsEndWithReturn(func->body))
|
||||
{
|
||||
for (size_t i = 0; i < targetCount; ++i)
|
||||
bytecode.emitABC(LOP_LOADNIL, uint8_t(target + i), 0, 0);
|
||||
}
|
||||
|
||||
popLocals(oldLocals);
|
||||
|
||||
size_t returnLabel = bytecode.emitLabel();
|
||||
patchJumps(expr, inlineFrames.back().returnJumps, returnLabel);
|
||||
|
||||
inlineFrames.pop_back();
|
||||
|
||||
// clean up constant state for future inlining attempts
|
||||
for (size_t i = 0; i < func->args.size; ++i)
|
||||
if (Constant* var = locstants.find(func->args.data[i]))
|
||||
var->type = Constant::Type_Unknown;
|
||||
|
||||
foldConstants(constants, variables, locstants, func->body);
|
||||
}
|
||||
|
||||
void compileExprCall(AstExprCall* expr, uint8_t target, uint8_t targetCount, bool targetTop = false, bool multRet = false)
|
||||
{
|
||||
LUAU_ASSERT(!targetTop || unsigned(target + targetCount) == regTop);
|
||||
|
||||
setDebugLine(expr); // normally compileExpr sets up line info, but compileExprCall can be called directly
|
||||
|
||||
// try inlining the function
|
||||
if (options.optimizationLevel >= 2 && !expr->self)
|
||||
{
|
||||
AstExprFunction* func = getFunctionExpr(expr->func);
|
||||
Function* fi = func ? functions.find(func) : nullptr;
|
||||
|
||||
if (fi && fi->canInline &&
|
||||
tryCompileInlinedCall(expr, func, target, targetCount, multRet, FInt::LuauCompileInlineThreshold,
|
||||
FInt::LuauCompileInlineThresholdMaxBoost, FInt::LuauCompileInlineDepth))
|
||||
return;
|
||||
|
||||
if (fi && !fi->canInline)
|
||||
bytecode.addDebugRemark("inlining failed: complex constructs in function body");
|
||||
}
|
||||
|
||||
RegScope rs(this);
|
||||
|
||||
unsigned int regCount = std::max(unsigned(1 + expr->self + expr->args.size), unsigned(targetCount));
|
||||
|
@ -749,7 +995,7 @@ struct Compiler
|
|||
{
|
||||
const Constant* c = constants.find(node);
|
||||
|
||||
if (!c)
|
||||
if (!c || c->type == Constant::Type_Unknown)
|
||||
return -1;
|
||||
|
||||
int cid = -1;
|
||||
|
@ -1384,27 +1630,29 @@ struct Compiler
|
|||
{
|
||||
RegScope rs(this);
|
||||
|
||||
// note: cv may be invalidated by compileExpr* so we stop using it before calling compile recursively
|
||||
const Constant* cv = constants.find(expr->index);
|
||||
|
||||
if (cv && cv->type == Constant::Type_Number && double(int(cv->valueNumber)) == cv->valueNumber && cv->valueNumber >= 1 &&
|
||||
cv->valueNumber <= 256)
|
||||
if (cv && cv->type == Constant::Type_Number && cv->valueNumber >= 1 && cv->valueNumber <= 256 &&
|
||||
double(int(cv->valueNumber)) == cv->valueNumber)
|
||||
{
|
||||
uint8_t rt = compileExprAuto(expr->expr, rs);
|
||||
uint8_t i = uint8_t(int(cv->valueNumber) - 1);
|
||||
|
||||
uint8_t rt = compileExprAuto(expr->expr, rs);
|
||||
|
||||
setDebugLine(expr->index);
|
||||
|
||||
bytecode.emitABC(LOP_GETTABLEN, target, rt, i);
|
||||
}
|
||||
else if (cv && cv->type == Constant::Type_String)
|
||||
{
|
||||
uint8_t rt = compileExprAuto(expr->expr, rs);
|
||||
|
||||
BytecodeBuilder::StringRef iname = sref(cv->getString());
|
||||
int32_t cid = bytecode.addConstantString(iname);
|
||||
if (cid < 0)
|
||||
CompileError::raise(expr->location, "Exceeded constant limit; simplify the code to compile");
|
||||
|
||||
uint8_t rt = compileExprAuto(expr->expr, rs);
|
||||
|
||||
setDebugLine(expr->index);
|
||||
|
||||
bytecode.emitABC(LOP_GETTABLEKS, target, rt, uint8_t(BytecodeBuilder::getStringHash(iname)));
|
||||
|
@ -1550,8 +1798,9 @@ struct Compiler
|
|||
}
|
||||
else if (AstExprLocal* expr = node->as<AstExprLocal>())
|
||||
{
|
||||
if (expr->upvalue)
|
||||
if (FFlag::LuauCompileSupportInlining ? !isExprLocalReg(expr) : expr->upvalue)
|
||||
{
|
||||
LUAU_ASSERT(expr->upvalue);
|
||||
uint8_t uid = getUpval(expr->local);
|
||||
|
||||
bytecode.emitABC(LOP_GETUPVAL, target, uid, 0);
|
||||
|
@ -1639,12 +1888,12 @@ struct Compiler
|
|||
// initializes target..target+targetCount-1 range using expressions from the list
|
||||
// if list has fewer expressions, and last expression is a call, we assume the call returns the rest of the values
|
||||
// if list has fewer expressions, and last expression isn't a call, we fill the rest with nil
|
||||
// assumes target register range can be clobbered and is at the top of the register space
|
||||
void compileExprListTop(const AstArray<AstExpr*>& list, uint8_t target, uint8_t targetCount)
|
||||
// assumes target register range can be clobbered and is at the top of the register space if targetTop = true
|
||||
void compileExprListTemp(const AstArray<AstExpr*>& list, uint8_t target, uint8_t targetCount, bool targetTop)
|
||||
{
|
||||
// we assume that target range is at the top of the register space and can be clobbered
|
||||
// this is what allows us to compile the last call expression - if it's a call - using targetTop=true
|
||||
LUAU_ASSERT(unsigned(target + targetCount) == regTop);
|
||||
LUAU_ASSERT(!targetTop || unsigned(target + targetCount) == regTop);
|
||||
|
||||
if (list.size == targetCount)
|
||||
{
|
||||
|
@ -1672,7 +1921,7 @@ struct Compiler
|
|||
|
||||
if (AstExprCall* expr = last->as<AstExprCall>())
|
||||
{
|
||||
compileExprCall(expr, uint8_t(target + list.size - 1), uint8_t(targetCount - (list.size - 1)), /* targetTop= */ true);
|
||||
compileExprCall(expr, uint8_t(target + list.size - 1), uint8_t(targetCount - (list.size - 1)), targetTop);
|
||||
}
|
||||
else if (AstExprVarargs* expr = last->as<AstExprVarargs>())
|
||||
{
|
||||
|
@ -1754,8 +2003,10 @@ struct Compiler
|
|||
|
||||
if (AstExprLocal* expr = node->as<AstExprLocal>())
|
||||
{
|
||||
if (expr->upvalue)
|
||||
if (FFlag::LuauCompileSupportInlining ? !isExprLocalReg(expr) : expr->upvalue)
|
||||
{
|
||||
LUAU_ASSERT(expr->upvalue);
|
||||
|
||||
LValue result = {LValue::Kind_Upvalue};
|
||||
result.upval = getUpval(expr->local);
|
||||
result.location = node->location;
|
||||
|
@ -1862,7 +2113,7 @@ struct Compiler
|
|||
bool isExprLocalReg(AstExpr* expr)
|
||||
{
|
||||
AstExprLocal* le = expr->as<AstExprLocal>();
|
||||
if (!le || le->upvalue)
|
||||
if (!le || (!FFlag::LuauCompileSupportInlining && le->upvalue))
|
||||
return false;
|
||||
|
||||
Local* l = locals.find(le->local);
|
||||
|
@ -2069,6 +2320,23 @@ struct Compiler
|
|||
loops.pop_back();
|
||||
}
|
||||
|
||||
void compileInlineReturn(AstStatReturn* stat, bool fallthrough)
|
||||
{
|
||||
setDebugLine(stat); // normally compileStat sets up line info, but compileInlineReturn can be called directly
|
||||
|
||||
InlineFrame frame = inlineFrames.back();
|
||||
|
||||
compileExprListTemp(stat->list, frame.target, frame.targetCount, /* targetTop= */ false);
|
||||
|
||||
if (!fallthrough)
|
||||
{
|
||||
size_t jumpLabel = bytecode.emitLabel();
|
||||
bytecode.emitAD(LOP_JUMP, 0, 0);
|
||||
|
||||
inlineFrames.back().returnJumps.push_back(jumpLabel);
|
||||
}
|
||||
}
|
||||
|
||||
void compileStatReturn(AstStatReturn* stat)
|
||||
{
|
||||
RegScope rs(this);
|
||||
|
@ -2127,16 +2395,137 @@ struct Compiler
|
|||
// note: allocReg in this case allocates into parent block register - note that we don't have RegScope here
|
||||
uint8_t vars = allocReg(stat, unsigned(stat->vars.size));
|
||||
|
||||
compileExprListTop(stat->values, vars, uint8_t(stat->vars.size));
|
||||
compileExprListTemp(stat->values, vars, uint8_t(stat->vars.size), /* targetTop= */ true);
|
||||
|
||||
for (size_t i = 0; i < stat->vars.size; ++i)
|
||||
pushLocal(stat->vars.data[i], uint8_t(vars + i));
|
||||
}
|
||||
|
||||
int getConstantShort(AstExpr* expr)
|
||||
{
|
||||
const Constant* c = constants.find(expr);
|
||||
|
||||
if (c && c->type == Constant::Type_Number)
|
||||
{
|
||||
double n = c->valueNumber;
|
||||
|
||||
if (n >= -32767 && n <= 32767 && double(int(n)) == n)
|
||||
return int(n);
|
||||
}
|
||||
|
||||
return INT_MIN;
|
||||
}
|
||||
|
||||
bool canUnrollForBody(AstStatFor* stat)
|
||||
{
|
||||
struct CanUnrollVisitor : AstVisitor
|
||||
{
|
||||
bool result = true;
|
||||
|
||||
bool visit(AstExpr* node) override
|
||||
{
|
||||
// functions may capture loop variable, and our upval handling doesn't handle elided variables (constant)
|
||||
// TODO: we could remove this case if we changed function compilation to create temporary locals for constant upvalues
|
||||
result = result && !node->is<AstExprFunction>();
|
||||
return result;
|
||||
}
|
||||
|
||||
bool visit(AstStat* node) override
|
||||
{
|
||||
// while we can easily unroll nested loops, our cost model doesn't take unrolling into account so this can result in code explosion
|
||||
// we also avoid continue/break since they introduce control flow across iterations
|
||||
result = result && !node->is<AstStatFor>() && !node->is<AstStatContinue>() && !node->is<AstStatBreak>();
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
CanUnrollVisitor canUnroll;
|
||||
stat->body->visit(&canUnroll);
|
||||
|
||||
return canUnroll.result;
|
||||
}
|
||||
|
||||
bool tryCompileUnrolledFor(AstStatFor* stat, int thresholdBase, int thresholdMaxBoost)
|
||||
{
|
||||
int from = getConstantShort(stat->from);
|
||||
int to = getConstantShort(stat->to);
|
||||
int step = stat->step ? getConstantShort(stat->step) : 1;
|
||||
|
||||
// check that limits are reasonably small and trip count can be computed
|
||||
if (from == INT_MIN || to == INT_MIN || step == INT_MIN || step == 0 || (step < 0 && to > from) || (step > 0 && to < from))
|
||||
{
|
||||
bytecode.addDebugRemark("loop unroll failed: invalid iteration count");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!canUnrollForBody(stat))
|
||||
{
|
||||
bytecode.addDebugRemark("loop unroll failed: unsupported loop body");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Variable* lv = variables.find(stat->var); lv && lv->written)
|
||||
{
|
||||
bytecode.addDebugRemark("loop unroll failed: mutable loop variable");
|
||||
return false;
|
||||
}
|
||||
|
||||
int tripCount = (to - from) / step + 1;
|
||||
|
||||
if (tripCount > thresholdBase)
|
||||
{
|
||||
bytecode.addDebugRemark("loop unroll failed: too many iterations (%d)", tripCount);
|
||||
return false;
|
||||
}
|
||||
|
||||
AstLocal* var = stat->var;
|
||||
uint64_t costModel = modelCost(stat->body, &var, 1);
|
||||
|
||||
// we use a dynamic cost threshold that's based on the fixed limit boosted by the cost advantage we gain due to unrolling
|
||||
bool varc = true;
|
||||
int unrolledCost = computeCost(costModel, &varc, 1) * tripCount;
|
||||
int baselineCost = (computeCost(costModel, nullptr, 0) + 1) * tripCount;
|
||||
int unrollProfit = (unrolledCost == 0) ? thresholdMaxBoost : std::min(thresholdMaxBoost, 100 * baselineCost / unrolledCost);
|
||||
|
||||
int threshold = thresholdBase * unrollProfit / 100;
|
||||
|
||||
if (unrolledCost > threshold)
|
||||
{
|
||||
bytecode.addDebugRemark(
|
||||
"loop unroll failed: too expensive (iterations %d, cost %d, profit %.2fx)", tripCount, unrolledCost, double(unrollProfit) / 100);
|
||||
return false;
|
||||
}
|
||||
|
||||
bytecode.addDebugRemark("loop unroll succeeded (iterations %d, cost %d, profit %.2fx)", tripCount, unrolledCost, double(unrollProfit) / 100);
|
||||
|
||||
for (int i = from; step > 0 ? i <= to : i >= to; i += step)
|
||||
{
|
||||
// we need to re-fold constants in the loop body with the new value; this reuses computed constant values elsewhere in the tree
|
||||
locstants[var].type = Constant::Type_Number;
|
||||
locstants[var].valueNumber = i;
|
||||
|
||||
foldConstants(constants, variables, locstants, stat);
|
||||
|
||||
compileStat(stat->body);
|
||||
}
|
||||
|
||||
// clean up fold state in case we need to recompile - normally we compile the loop body once, but due to inlining we may need to do it again
|
||||
locstants[var].type = Constant::Type_Unknown;
|
||||
|
||||
foldConstants(constants, variables, locstants, stat);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void compileStatFor(AstStatFor* stat)
|
||||
{
|
||||
RegScope rs(this);
|
||||
|
||||
// Optimization: small loops can be unrolled when it is profitable
|
||||
if (options.optimizationLevel >= 2 && isConstant(stat->to) && isConstant(stat->from) && (!stat->step || isConstant(stat->step)))
|
||||
if (tryCompileUnrolledFor(stat, FInt::LuauCompileLoopUnrollThreshold, FInt::LuauCompileLoopUnrollThresholdMaxBoost))
|
||||
return;
|
||||
|
||||
size_t oldLocals = localStack.size();
|
||||
size_t oldJumps = loopJumps.size();
|
||||
|
||||
|
@ -2210,12 +2599,17 @@ struct Compiler
|
|||
uint8_t regs = allocReg(stat, 3);
|
||||
|
||||
// this puts initial values of (generator, state, index) into the loop registers
|
||||
compileExprListTop(stat->values, regs, 3);
|
||||
compileExprListTemp(stat->values, regs, 3, /* targetTop= */ true);
|
||||
|
||||
// for the general case, we will execute a CALL for every iteration that needs to evaluate "variables... = generator(state, index)"
|
||||
// this requires at least extra 3 stack slots after index
|
||||
// note that these stack slots overlap with the variables so we only need to reserve them to make sure stack frame is large enough
|
||||
reserveReg(stat, 3);
|
||||
// we don't need this because the extra stack space is just for calling the function with a loop protocol which is similar to calling
|
||||
// metamethods - it should fit into the extra stack reservation
|
||||
if (!FFlag::LuauCompileIterNoReserve)
|
||||
{
|
||||
// for the general case, we will execute a CALL for every iteration that needs to evaluate "variables... = generator(state, index)"
|
||||
// this requires at least extra 3 stack slots after index
|
||||
// note that these stack slots overlap with the variables so we only need to reserve them to make sure stack frame is large enough
|
||||
reserveReg(stat, 3);
|
||||
}
|
||||
|
||||
// note that we reserve at least 2 variables; this allows our fast path to assume that we need 2 variables instead of 1 or 2
|
||||
uint8_t vars = allocReg(stat, std::max(unsigned(stat->vars.size), 2u));
|
||||
|
@ -2224,7 +2618,7 @@ struct Compiler
|
|||
// Optimization: when we iterate through pairs/ipairs, we generate special bytecode that optimizes the traversal using internal iteration
|
||||
// index These instructions dynamically check if generator is equal to next/inext and bail out They assume that the generator produces 2
|
||||
// variables, which is why we allocate at least 2 above (see vars assignment)
|
||||
LuauOpcode skipOp = LOP_JUMP;
|
||||
LuauOpcode skipOp = FFlag::LuauCompileIter ? LOP_FORGPREP : LOP_JUMP;
|
||||
LuauOpcode loopOp = LOP_FORGLOOP;
|
||||
|
||||
if (options.optimizationLevel >= 1 && stat->vars.size <= 2)
|
||||
|
@ -2241,7 +2635,7 @@ struct Compiler
|
|||
else if (builtin.isGlobal("pairs")) // for .. in pairs(t)
|
||||
{
|
||||
skipOp = LOP_FORGPREP_NEXT;
|
||||
loopOp = LOP_FORGLOOP_NEXT;
|
||||
loopOp = FFlag::LuauCompileIterNoPairs ? LOP_FORGLOOP : LOP_FORGLOOP_NEXT;
|
||||
}
|
||||
}
|
||||
else if (stat->values.size == 2)
|
||||
|
@ -2251,7 +2645,7 @@ struct Compiler
|
|||
if (builtin.isGlobal("next")) // for .. in next,t
|
||||
{
|
||||
skipOp = LOP_FORGPREP_NEXT;
|
||||
loopOp = LOP_FORGLOOP_NEXT;
|
||||
loopOp = FFlag::LuauCompileIterNoPairs ? LOP_FORGLOOP : LOP_FORGLOOP_NEXT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2388,10 +2782,10 @@ struct Compiler
|
|||
// compute values into temporaries
|
||||
uint8_t regs = allocReg(stat, unsigned(stat->vars.size));
|
||||
|
||||
compileExprListTop(stat->values, regs, uint8_t(stat->vars.size));
|
||||
compileExprListTemp(stat->values, regs, uint8_t(stat->vars.size), /* targetTop= */ true);
|
||||
|
||||
// assign variables that have associated values; note that if we have fewer values than variables, we'll assign nil because compileExprListTop
|
||||
// will generate nils
|
||||
// assign variables that have associated values; note that if we have fewer values than variables, we'll assign nil because
|
||||
// compileExprListTemp will generate nils
|
||||
for (size_t i = 0; i < stat->vars.size; ++i)
|
||||
{
|
||||
setDebugLine(stat->vars.data[i]);
|
||||
|
@ -2549,7 +2943,10 @@ struct Compiler
|
|||
}
|
||||
else if (AstStatReturn* stat = node->as<AstStatReturn>())
|
||||
{
|
||||
compileStatReturn(stat);
|
||||
if (options.optimizationLevel >= 2 && !inlineFrames.empty())
|
||||
compileInlineReturn(stat, /* fallthrough= */ false);
|
||||
else
|
||||
compileStatReturn(stat);
|
||||
}
|
||||
else if (AstStatExpr* stat = node->as<AstStatExpr>())
|
||||
{
|
||||
|
@ -2826,6 +3223,8 @@ struct Compiler
|
|||
: self(self)
|
||||
, functions(functions)
|
||||
{
|
||||
// preallocate the result; this works around std::vector's inefficient growth policy for small arrays
|
||||
functions.reserve(16);
|
||||
}
|
||||
|
||||
bool visit(AstExprFunction* node) override
|
||||
|
@ -2941,6 +3340,10 @@ struct Compiler
|
|||
{
|
||||
uint32_t id;
|
||||
std::vector<AstLocal*> upvals;
|
||||
|
||||
uint64_t costModel = 0;
|
||||
unsigned int stackSize = 0;
|
||||
bool canInline = false;
|
||||
};
|
||||
|
||||
struct Local
|
||||
|
@ -2970,6 +3373,16 @@ struct Compiler
|
|||
AstExpr* untilCondition;
|
||||
};
|
||||
|
||||
struct InlineFrame
|
||||
{
|
||||
AstExprFunction* func;
|
||||
|
||||
uint8_t target;
|
||||
uint8_t targetCount;
|
||||
|
||||
std::vector<size_t> returnJumps;
|
||||
};
|
||||
|
||||
BytecodeBuilder& bytecode;
|
||||
|
||||
CompileOptions options;
|
||||
|
@ -2979,6 +3392,7 @@ struct Compiler
|
|||
DenseHashMap<AstName, Global> globals;
|
||||
DenseHashMap<AstLocal*, Variable> variables;
|
||||
DenseHashMap<AstExpr*, Constant> constants;
|
||||
DenseHashMap<AstLocal*, Constant> locstants;
|
||||
DenseHashMap<AstExprTable*, TableShape> tableShapes;
|
||||
|
||||
unsigned int regTop = 0;
|
||||
|
@ -2991,6 +3405,7 @@ struct Compiler
|
|||
std::vector<AstLocal*> upvals;
|
||||
std::vector<LoopJump> loopJumps;
|
||||
std::vector<Loop> loops;
|
||||
std::vector<InlineFrame> inlineFrames;
|
||||
};
|
||||
|
||||
void compileOrThrow(BytecodeBuilder& bytecode, AstStatBlock* root, const AstNameTable& names, const CompileOptions& options)
|
||||
|
@ -3008,7 +3423,7 @@ void compileOrThrow(BytecodeBuilder& bytecode, AstStatBlock* root, const AstName
|
|||
if (options.optimizationLevel >= 1)
|
||||
{
|
||||
// this pass analyzes constantness of expressions
|
||||
foldConstants(compiler.constants, compiler.variables, root);
|
||||
foldConstants(compiler.constants, compiler.variables, compiler.locstants, root);
|
||||
|
||||
// this pass analyzes table assignments to estimate table shapes for initially empty tables
|
||||
predictTableShapes(compiler.tableShapes, root);
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
#include <math.h>
|
||||
|
||||
LUAU_FASTFLAG(LuauCompileSupportInlining)
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace Compile
|
||||
|
@ -191,13 +193,13 @@ struct ConstantVisitor : AstVisitor
|
|||
{
|
||||
DenseHashMap<AstExpr*, Constant>& constants;
|
||||
DenseHashMap<AstLocal*, Variable>& variables;
|
||||
DenseHashMap<AstLocal*, Constant>& locals;
|
||||
|
||||
DenseHashMap<AstLocal*, Constant> locals;
|
||||
|
||||
ConstantVisitor(DenseHashMap<AstExpr*, Constant>& constants, DenseHashMap<AstLocal*, Variable>& variables)
|
||||
ConstantVisitor(
|
||||
DenseHashMap<AstExpr*, Constant>& constants, DenseHashMap<AstLocal*, Variable>& variables, DenseHashMap<AstLocal*, Constant>& locals)
|
||||
: constants(constants)
|
||||
, variables(variables)
|
||||
, locals(nullptr)
|
||||
, locals(locals)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -290,7 +292,8 @@ struct ConstantVisitor : AstVisitor
|
|||
Constant la = analyze(expr->left);
|
||||
Constant ra = analyze(expr->right);
|
||||
|
||||
if (la.type != Constant::Type_Unknown && ra.type != Constant::Type_Unknown)
|
||||
// note: ra doesn't need to be constant to fold and/or
|
||||
if (la.type != Constant::Type_Unknown)
|
||||
foldBinary(result, expr->op, la, ra);
|
||||
}
|
||||
else if (AstExprTypeAssertion* expr = node->as<AstExprTypeAssertion>())
|
||||
|
@ -313,12 +316,35 @@ struct ConstantVisitor : AstVisitor
|
|||
LUAU_ASSERT(!"Unknown expression type");
|
||||
}
|
||||
|
||||
if (result.type != Constant::Type_Unknown)
|
||||
constants[node] = result;
|
||||
recordConstant(constants, node, result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void recordConstant(DenseHashMap<T, Constant>& map, T key, const Constant& value)
|
||||
{
|
||||
if (value.type != Constant::Type_Unknown)
|
||||
map[key] = value;
|
||||
else if (!FFlag::LuauCompileSupportInlining)
|
||||
;
|
||||
else if (Constant* old = map.find(key))
|
||||
old->type = Constant::Type_Unknown;
|
||||
}
|
||||
|
||||
void recordValue(AstLocal* local, const Constant& value)
|
||||
{
|
||||
// note: we rely on trackValues to have been run before us
|
||||
Variable* v = variables.find(local);
|
||||
LUAU_ASSERT(v);
|
||||
|
||||
if (!v->written)
|
||||
{
|
||||
v->constant = (value.type != Constant::Type_Unknown);
|
||||
recordConstant(locals, local, value);
|
||||
}
|
||||
}
|
||||
|
||||
bool visit(AstExpr* node) override
|
||||
{
|
||||
// note: we short-circuit the visitor traversal through any expression trees by returning false
|
||||
|
@ -335,18 +361,7 @@ struct ConstantVisitor : AstVisitor
|
|||
{
|
||||
Constant arg = analyze(node->values.data[i]);
|
||||
|
||||
if (arg.type != Constant::Type_Unknown)
|
||||
{
|
||||
// note: we rely on trackValues to have been run before us
|
||||
Variable* v = variables.find(node->vars.data[i]);
|
||||
LUAU_ASSERT(v);
|
||||
|
||||
if (!v->written)
|
||||
{
|
||||
locals[node->vars.data[i]] = arg;
|
||||
v->constant = true;
|
||||
}
|
||||
}
|
||||
recordValue(node->vars.data[i], arg);
|
||||
}
|
||||
|
||||
if (node->vars.size > node->values.size)
|
||||
|
@ -360,15 +375,8 @@ struct ConstantVisitor : AstVisitor
|
|||
{
|
||||
for (size_t i = node->values.size; i < node->vars.size; ++i)
|
||||
{
|
||||
// note: we rely on trackValues to have been run before us
|
||||
Variable* v = variables.find(node->vars.data[i]);
|
||||
LUAU_ASSERT(v);
|
||||
|
||||
if (!v->written)
|
||||
{
|
||||
locals[node->vars.data[i]].type = Constant::Type_Nil;
|
||||
v->constant = true;
|
||||
}
|
||||
Constant nil = {Constant::Type_Nil};
|
||||
recordValue(node->vars.data[i], nil);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -384,9 +392,10 @@ struct ConstantVisitor : AstVisitor
|
|||
}
|
||||
};
|
||||
|
||||
void foldConstants(DenseHashMap<AstExpr*, Constant>& constants, DenseHashMap<AstLocal*, Variable>& variables, AstNode* root)
|
||||
void foldConstants(DenseHashMap<AstExpr*, Constant>& constants, DenseHashMap<AstLocal*, Variable>& variables,
|
||||
DenseHashMap<AstLocal*, Constant>& locals, AstNode* root)
|
||||
{
|
||||
ConstantVisitor visitor{constants, variables};
|
||||
ConstantVisitor visitor{constants, variables, locals};
|
||||
root->visit(&visitor);
|
||||
}
|
||||
|
||||
|
|
|
@ -42,7 +42,8 @@ struct Constant
|
|||
}
|
||||
};
|
||||
|
||||
void foldConstants(DenseHashMap<AstExpr*, Constant>& constants, DenseHashMap<AstLocal*, Variable>& variables, AstNode* root);
|
||||
void foldConstants(DenseHashMap<AstExpr*, Constant>& constants, DenseHashMap<AstLocal*, Variable>& variables,
|
||||
DenseHashMap<AstLocal*, Constant>& locals, AstNode* root);
|
||||
|
||||
} // namespace Compile
|
||||
} // namespace Luau
|
||||
|
|
|
@ -0,0 +1,258 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#include "CostModel.h"
|
||||
|
||||
#include "Luau/Common.h"
|
||||
#include "Luau/DenseHash.h"
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace Compile
|
||||
{
|
||||
|
||||
inline uint64_t parallelAddSat(uint64_t x, uint64_t y)
|
||||
{
|
||||
uint64_t s = x + y;
|
||||
uint64_t m = s & 0x8080808080808080ull; // saturation mask
|
||||
|
||||
return (s ^ m) | (m - (m >> 7));
|
||||
}
|
||||
|
||||
struct Cost
|
||||
{
|
||||
static const uint64_t kLiteral = ~0ull;
|
||||
|
||||
// cost model: 8 bytes, where first byte is the baseline cost, and the next 7 bytes are discounts for when variable #i is constant
|
||||
uint64_t model;
|
||||
// constant mask: 8-byte 0xff mask; equal to all ff's for literals, for variables only byte #i (1+) is set to align with model
|
||||
uint64_t constant;
|
||||
|
||||
Cost(int cost = 0, uint64_t constant = 0)
|
||||
: model(cost < 0x7f ? cost : 0x7f)
|
||||
, constant(constant)
|
||||
{
|
||||
}
|
||||
|
||||
Cost operator+(const Cost& other) const
|
||||
{
|
||||
Cost result;
|
||||
result.model = parallelAddSat(model, other.model);
|
||||
return result;
|
||||
}
|
||||
|
||||
Cost& operator+=(const Cost& other)
|
||||
{
|
||||
model = parallelAddSat(model, other.model);
|
||||
constant = 0;
|
||||
return *this;
|
||||
}
|
||||
|
||||
static Cost fold(const Cost& x, const Cost& y)
|
||||
{
|
||||
uint64_t newmodel = parallelAddSat(x.model, y.model);
|
||||
uint64_t newconstant = x.constant & y.constant;
|
||||
|
||||
// the extra cost for folding is 1; the discount is 1 for the variable that is shared by x&y (or whichever one is used in x/y if the other is
|
||||
// literal)
|
||||
uint64_t extra = (newconstant == kLiteral) ? 0 : (1 | (0x0101010101010101ull & newconstant));
|
||||
|
||||
Cost result;
|
||||
result.model = parallelAddSat(newmodel, extra);
|
||||
result.constant = newconstant;
|
||||
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
struct CostVisitor : AstVisitor
|
||||
{
|
||||
DenseHashMap<AstLocal*, uint64_t> vars;
|
||||
Cost result;
|
||||
|
||||
CostVisitor()
|
||||
: vars(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
Cost model(AstExpr* node)
|
||||
{
|
||||
if (AstExprGroup* expr = node->as<AstExprGroup>())
|
||||
{
|
||||
return model(expr->expr);
|
||||
}
|
||||
else if (node->is<AstExprConstantNil>() || node->is<AstExprConstantBool>() || node->is<AstExprConstantNumber>() ||
|
||||
node->is<AstExprConstantString>())
|
||||
{
|
||||
return Cost(0, Cost::kLiteral);
|
||||
}
|
||||
else if (AstExprLocal* expr = node->as<AstExprLocal>())
|
||||
{
|
||||
const uint64_t* i = vars.find(expr->local);
|
||||
|
||||
return Cost(0, i ? *i : 0); // locals typically don't require extra instructions to compute
|
||||
}
|
||||
else if (node->is<AstExprGlobal>())
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
else if (node->is<AstExprVarargs>())
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
else if (AstExprCall* expr = node->as<AstExprCall>())
|
||||
{
|
||||
Cost cost = 3;
|
||||
cost += model(expr->func);
|
||||
|
||||
for (size_t i = 0; i < expr->args.size; ++i)
|
||||
{
|
||||
Cost ac = model(expr->args.data[i]);
|
||||
// for constants/locals we still need to copy them to the argument list
|
||||
cost += ac.model == 0 ? Cost(1) : ac;
|
||||
}
|
||||
|
||||
return cost;
|
||||
}
|
||||
else if (AstExprIndexName* expr = node->as<AstExprIndexName>())
|
||||
{
|
||||
return model(expr->expr) + 1;
|
||||
}
|
||||
else if (AstExprIndexExpr* expr = node->as<AstExprIndexExpr>())
|
||||
{
|
||||
return model(expr->expr) + model(expr->index) + 1;
|
||||
}
|
||||
else if (AstExprFunction* expr = node->as<AstExprFunction>())
|
||||
{
|
||||
return 10; // high baseline cost due to allocation
|
||||
}
|
||||
else if (AstExprTable* expr = node->as<AstExprTable>())
|
||||
{
|
||||
Cost cost = 10; // high baseline cost due to allocation
|
||||
|
||||
for (size_t i = 0; i < expr->items.size; ++i)
|
||||
{
|
||||
const AstExprTable::Item& item = expr->items.data[i];
|
||||
|
||||
if (item.key)
|
||||
cost += model(item.key);
|
||||
|
||||
cost += model(item.value);
|
||||
cost += 1;
|
||||
}
|
||||
|
||||
return cost;
|
||||
}
|
||||
else if (AstExprUnary* expr = node->as<AstExprUnary>())
|
||||
{
|
||||
return Cost::fold(model(expr->expr), Cost(0, Cost::kLiteral));
|
||||
}
|
||||
else if (AstExprBinary* expr = node->as<AstExprBinary>())
|
||||
{
|
||||
return Cost::fold(model(expr->left), model(expr->right));
|
||||
}
|
||||
else if (AstExprTypeAssertion* expr = node->as<AstExprTypeAssertion>())
|
||||
{
|
||||
return model(expr->expr);
|
||||
}
|
||||
else if (AstExprIfElse* expr = node->as<AstExprIfElse>())
|
||||
{
|
||||
return model(expr->condition) + model(expr->trueExpr) + model(expr->falseExpr) + 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
LUAU_ASSERT(!"Unknown expression type");
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
void assign(AstExpr* expr)
|
||||
{
|
||||
// variable assignments reset variable mask, so that further uses of this variable aren't discounted
|
||||
// this doesn't work perfectly with backwards control flow like loops, but is good enough for a single pass
|
||||
if (AstExprLocal* lv = expr->as<AstExprLocal>())
|
||||
if (uint64_t* i = vars.find(lv->local))
|
||||
*i = 0;
|
||||
}
|
||||
|
||||
bool visit(AstExpr* node) override
|
||||
{
|
||||
// note: we short-circuit the visitor traversal through any expression trees by returning false
|
||||
// recursive traversal is happening inside model() which makes it easier to get the resulting value of the subexpression
|
||||
result += model(node);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool visit(AstStat* node) override
|
||||
{
|
||||
if (node->is<AstStatIf>())
|
||||
result += 2;
|
||||
else if (node->is<AstStatWhile>() || node->is<AstStatRepeat>() || node->is<AstStatFor>() || node->is<AstStatForIn>())
|
||||
result += 2;
|
||||
else if (node->is<AstStatBreak>() || node->is<AstStatContinue>())
|
||||
result += 1;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool visit(AstStatLocal* node) override
|
||||
{
|
||||
for (size_t i = 0; i < node->values.size; ++i)
|
||||
{
|
||||
Cost arg = model(node->values.data[i]);
|
||||
|
||||
// propagate constant mask from expression through variables
|
||||
if (arg.constant && i < node->vars.size)
|
||||
vars[node->vars.data[i]] = arg.constant;
|
||||
|
||||
result += arg;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool visit(AstStatAssign* node) override
|
||||
{
|
||||
for (size_t i = 0; i < node->vars.size; ++i)
|
||||
assign(node->vars.data[i]);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool visit(AstStatCompoundAssign* node) override
|
||||
{
|
||||
assign(node->var);
|
||||
|
||||
// if lhs is not a local, setting it requires an extra table operation
|
||||
result += node->var->is<AstExprLocal>() ? 1 : 2;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
uint64_t modelCost(AstNode* root, AstLocal* const* vars, size_t varCount)
|
||||
{
|
||||
CostVisitor visitor;
|
||||
for (size_t i = 0; i < varCount && i < 7; ++i)
|
||||
visitor.vars[vars[i]] = 0xffull << (i * 8 + 8);
|
||||
|
||||
root->visit(&visitor);
|
||||
|
||||
return visitor.result.model;
|
||||
}
|
||||
|
||||
int computeCost(uint64_t model, const bool* varsConst, size_t varCount)
|
||||
{
|
||||
int cost = int(model & 0x7f);
|
||||
|
||||
// don't apply discounts to what is likely a saturated sum
|
||||
if (cost == 0x7f)
|
||||
return cost;
|
||||
|
||||
for (size_t i = 0; i < varCount && i < 7; ++i)
|
||||
cost -= int((model >> (i * 8 + 8)) & 0x7f) * varsConst[i];
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
||||
} // namespace Compile
|
||||
} // namespace Luau
|
|
@ -0,0 +1,18 @@
|
|||
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
||||
#pragma once
|
||||
|
||||
#include "Luau/Ast.h"
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
namespace Compile
|
||||
{
|
||||
|
||||
// cost model: 8 bytes, where first byte is the baseline cost, and the next 7 bytes are discounts for when variable #i is constant
|
||||
uint64_t modelCost(AstNode* root, AstLocal* const* vars, size_t varCount);
|
||||
|
||||
// cost is computed as B - sum(Di * Ci), where B is baseline cost, Di is the discount for each variable and Ci is 1 when variable #i is constant
|
||||
int computeCost(uint64_t model, const bool* varsConst, size_t varCount);
|
||||
|
||||
} // namespace Compile
|
||||
} // namespace Luau
|
|
@ -299,7 +299,7 @@ LUA_API uintptr_t lua_encodepointer(lua_State* L, uintptr_t p);
|
|||
|
||||
LUA_API double lua_clock();
|
||||
|
||||
LUA_API void lua_setuserdatadtor(lua_State* L, int tag, void (*dtor)(void*));
|
||||
LUA_API void lua_setuserdatadtor(lua_State* L, int tag, void (*dtor)(lua_State*, void*));
|
||||
|
||||
LUA_API void lua_clonefunction(lua_State* L, int idx);
|
||||
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
|
||||
#include <string.h>
|
||||
|
||||
LUAU_FASTFLAG(LuauGcWorkTrackFix)
|
||||
|
||||
const char* lua_ident = "$Lua: Lua 5.1.4 Copyright (C) 1994-2008 Lua.org, PUC-Rio $\n"
|
||||
"$Authors: R. Ierusalimschy, L. H. de Figueiredo & W. Celes $\n"
|
||||
"$URL: www.lua.org $\n";
|
||||
|
@ -1050,6 +1052,7 @@ int lua_gc(lua_State* L, int what, int data)
|
|||
{
|
||||
size_t prevthreshold = g->GCthreshold;
|
||||
size_t amount = (cast_to(size_t, data) << 10);
|
||||
ptrdiff_t oldcredit = g->gcstate == GCSpause ? 0 : g->GCthreshold - g->totalbytes;
|
||||
|
||||
// temporarily adjust the threshold so that we can perform GC work
|
||||
if (amount <= g->totalbytes)
|
||||
|
@ -1069,9 +1072,9 @@ int lua_gc(lua_State* L, int what, int data)
|
|||
|
||||
while (g->GCthreshold <= g->totalbytes)
|
||||
{
|
||||
luaC_step(L, false);
|
||||
size_t stepsize = luaC_step(L, false);
|
||||
|
||||
actualwork += g->gcstepsize;
|
||||
actualwork += FFlag::LuauGcWorkTrackFix ? stepsize : g->gcstepsize;
|
||||
|
||||
if (g->gcstate == GCSpause)
|
||||
{ /* end of cycle? */
|
||||
|
@ -1107,11 +1110,20 @@ int lua_gc(lua_State* L, int what, int data)
|
|||
// if cycle hasn't finished, advance threshold forward for the amount of extra work performed
|
||||
if (g->gcstate != GCSpause)
|
||||
{
|
||||
// if a new cycle was triggered by explicit step, we ignore old threshold as that shows an incorrect 'credit' of GC work
|
||||
if (waspaused)
|
||||
g->GCthreshold = g->totalbytes + actualwork;
|
||||
if (FFlag::LuauGcWorkTrackFix)
|
||||
{
|
||||
// if a new cycle was triggered by explicit step, old 'credit' of GC work is 0
|
||||
ptrdiff_t newthreshold = g->totalbytes + actualwork + oldcredit;
|
||||
g->GCthreshold = newthreshold < 0 ? 0 : newthreshold;
|
||||
}
|
||||
else
|
||||
g->GCthreshold = prevthreshold + actualwork;
|
||||
{
|
||||
// if a new cycle was triggered by explicit step, we ignore old threshold as that shows an incorrect 'credit' of GC work
|
||||
if (waspaused)
|
||||
g->GCthreshold = g->totalbytes + actualwork;
|
||||
else
|
||||
g->GCthreshold = prevthreshold + actualwork;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -1258,7 +1270,7 @@ const char* lua_setupvalue(lua_State* L, int funcindex, int n)
|
|||
L->top--;
|
||||
setobj(L, val, L->top);
|
||||
luaC_barrier(L, clvalue(fi), L->top);
|
||||
luaC_upvalbarrier(L, NULL, val);
|
||||
luaC_upvalbarrier(L, cast_to(UpVal*, NULL), val);
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
@ -1311,7 +1323,7 @@ void lua_unref(lua_State* L, int ref)
|
|||
return;
|
||||
}
|
||||
|
||||
void lua_setuserdatadtor(lua_State* L, int tag, void (*dtor)(void*))
|
||||
void lua_setuserdatadtor(lua_State* L, int tag, void (*dtor)(lua_State*, void*))
|
||||
{
|
||||
api_check(L, unsigned(tag) < LUA_UTAG_LIMIT);
|
||||
L->global->udatagc[tag] = dtor;
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
LUAU_FASTFLAGVARIABLE(LuauFixBuiltinsStackLimit, false)
|
||||
|
||||
// luauF functions implement FASTCALL instruction that performs a direct execution of some builtin functions from the VM
|
||||
// The rule of thumb is that FASTCALL functions can not call user code, yield, fail, or reallocate stack.
|
||||
// If types of the arguments mismatch, luauF_* needs to return -1 and the execution will fall back to the usual call path
|
||||
|
@ -1003,7 +1005,7 @@ static int luauF_tunpack(lua_State* L, StkId res, TValue* arg0, int nresults, St
|
|||
else if (nparams == 3 && ttisnumber(args) && ttisnumber(args + 1) && nvalue(args) == 1.0)
|
||||
n = int(nvalue(args + 1));
|
||||
|
||||
if (n >= 0 && n <= t->sizearray && cast_int(L->stack_last - res) >= n)
|
||||
if (n >= 0 && n <= t->sizearray && cast_int(L->stack_last - res) >= n && (!FFlag::LuauFixBuiltinsStackLimit || n + nparams <= LUAI_MAXCSTACK))
|
||||
{
|
||||
TValue* array = t->array;
|
||||
for (int i = 0; i < n; ++i)
|
||||
|
|
|
@ -14,6 +14,6 @@ LUAI_FUNC UpVal* luaF_findupval(lua_State* L, StkId level);
|
|||
LUAI_FUNC void luaF_close(lua_State* L, StkId level);
|
||||
LUAI_FUNC void luaF_freeproto(lua_State* L, Proto* f, struct lua_Page* page);
|
||||
LUAI_FUNC void luaF_freeclosure(lua_State* L, Closure* c, struct lua_Page* page);
|
||||
void luaF_unlinkupval(UpVal* uv);
|
||||
LUAI_FUNC void luaF_unlinkupval(UpVal* uv);
|
||||
LUAI_FUNC void luaF_freeupval(lua_State* L, UpVal* uv, struct lua_Page* page);
|
||||
LUAI_FUNC const LocVar* luaF_getlocal(const Proto* func, int local_number, int pc);
|
||||
|
|
|
@ -13,9 +13,10 @@
|
|||
|
||||
#include <string.h>
|
||||
|
||||
#define GC_SWEEPMAX 40
|
||||
#define GC_SWEEPCOST 10
|
||||
#define GC_SWEEPPAGESTEPCOST 4
|
||||
LUAU_FASTFLAGVARIABLE(LuauGcWorkTrackFix, false)
|
||||
LUAU_FASTFLAGVARIABLE(LuauGcSweepCostFix, false)
|
||||
|
||||
#define GC_SWEEPPAGESTEPCOST (FFlag::LuauGcSweepCostFix ? 16 : 4)
|
||||
|
||||
#define GC_INTERRUPT(state) \
|
||||
{ \
|
||||
|
@ -64,7 +65,7 @@ static void recordGcStateStep(global_State* g, int startgcstate, double seconds,
|
|||
case GCSpropagate:
|
||||
case GCSpropagateagain:
|
||||
g->gcmetrics.currcycle.marktime += seconds;
|
||||
g->gcmetrics.currcycle.markrequests += g->gcstepsize;
|
||||
g->gcmetrics.currcycle.markwork += work;
|
||||
|
||||
if (assist)
|
||||
g->gcmetrics.currcycle.markassisttime += seconds;
|
||||
|
@ -74,7 +75,7 @@ static void recordGcStateStep(global_State* g, int startgcstate, double seconds,
|
|||
break;
|
||||
case GCSsweep:
|
||||
g->gcmetrics.currcycle.sweeptime += seconds;
|
||||
g->gcmetrics.currcycle.sweeprequests += g->gcstepsize;
|
||||
g->gcmetrics.currcycle.sweepwork += work;
|
||||
|
||||
if (assist)
|
||||
g->gcmetrics.currcycle.sweepassisttime += seconds;
|
||||
|
@ -87,13 +88,11 @@ static void recordGcStateStep(global_State* g, int startgcstate, double seconds,
|
|||
{
|
||||
g->gcmetrics.stepassisttimeacc += seconds;
|
||||
g->gcmetrics.currcycle.assistwork += work;
|
||||
g->gcmetrics.currcycle.assistrequests += g->gcstepsize;
|
||||
}
|
||||
else
|
||||
{
|
||||
g->gcmetrics.stepexplicittimeacc += seconds;
|
||||
g->gcmetrics.currcycle.explicitwork += work;
|
||||
g->gcmetrics.currcycle.explicitrequests += g->gcstepsize;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -878,11 +877,11 @@ static size_t getheaptrigger(global_State* g, size_t heapgoal)
|
|||
return heaptrigger < int64_t(g->totalbytes) ? g->totalbytes : (heaptrigger > int64_t(heapgoal) ? heapgoal : size_t(heaptrigger));
|
||||
}
|
||||
|
||||
void luaC_step(lua_State* L, bool assist)
|
||||
size_t luaC_step(lua_State* L, bool assist)
|
||||
{
|
||||
global_State* g = L->global;
|
||||
|
||||
int lim = (g->gcstepsize / 100) * g->gcstepmul; /* how much to work */
|
||||
int lim = FFlag::LuauGcWorkTrackFix ? g->gcstepsize * g->gcstepmul / 100 : (g->gcstepsize / 100) * g->gcstepmul; /* how much to work */
|
||||
LUAU_ASSERT(g->totalbytes >= g->GCthreshold);
|
||||
size_t debt = g->totalbytes - g->GCthreshold;
|
||||
|
||||
|
@ -902,12 +901,13 @@ void luaC_step(lua_State* L, bool assist)
|
|||
int lastgcstate = g->gcstate;
|
||||
|
||||
size_t work = gcstep(L, lim);
|
||||
(void)work;
|
||||
|
||||
#ifdef LUAI_GCMETRICS
|
||||
recordGcStateStep(g, lastgcstate, lua_clock() - lasttimestamp, assist, work);
|
||||
#endif
|
||||
|
||||
size_t actualstepsize = work * 100 / g->gcstepmul;
|
||||
|
||||
// at the end of the last cycle
|
||||
if (g->gcstate == GCSpause)
|
||||
{
|
||||
|
@ -927,14 +927,16 @@ void luaC_step(lua_State* L, bool assist)
|
|||
}
|
||||
else
|
||||
{
|
||||
g->GCthreshold = g->totalbytes + g->gcstepsize;
|
||||
g->GCthreshold = g->totalbytes + (FFlag::LuauGcWorkTrackFix ? actualstepsize : g->gcstepsize);
|
||||
|
||||
// compensate if GC is "behind schedule" (has some debt to pay)
|
||||
if (g->GCthreshold > debt)
|
||||
if (FFlag::LuauGcWorkTrackFix ? g->GCthreshold >= debt : g->GCthreshold > debt)
|
||||
g->GCthreshold -= debt;
|
||||
}
|
||||
|
||||
GC_INTERRUPT(lastgcstate);
|
||||
|
||||
return actualstepsize;
|
||||
}
|
||||
|
||||
void luaC_fullgc(lua_State* L)
|
||||
|
|
|
@ -120,7 +120,7 @@
|
|||
|
||||
#define luaC_upvalbarrier(L, uv, tv) \
|
||||
{ \
|
||||
if (iscollectable(tv) && iswhite(gcvalue(tv)) && (!(uv) || ((UpVal*)uv)->v != &((UpVal*)uv)->u.value)) \
|
||||
if (iscollectable(tv) && iswhite(gcvalue(tv)) && (!(uv) || (uv)->v != &(uv)->u.value)) \
|
||||
luaC_barrierupval(L, gcvalue(tv)); \
|
||||
}
|
||||
|
||||
|
@ -133,7 +133,7 @@
|
|||
#define luaC_init(L, o, tt) luaC_initobj(L, cast_to(GCObject*, (o)), tt)
|
||||
|
||||
LUAI_FUNC void luaC_freeall(lua_State* L);
|
||||
LUAI_FUNC void luaC_step(lua_State* L, bool assist);
|
||||
LUAI_FUNC size_t luaC_step(lua_State* L, bool assist);
|
||||
LUAI_FUNC void luaC_fullgc(lua_State* L);
|
||||
LUAI_FUNC void luaC_initobj(lua_State* L, GCObject* o, uint8_t tt);
|
||||
LUAI_FUNC void luaC_initupval(lua_State* L, UpVal* uv);
|
||||
|
|
|
@ -106,7 +106,7 @@ struct GCCycleMetrics
|
|||
double markassisttime = 0.0;
|
||||
double markmaxexplicittime = 0.0;
|
||||
size_t markexplicitsteps = 0;
|
||||
size_t markrequests = 0;
|
||||
size_t markwork = 0;
|
||||
|
||||
double atomicstarttimestamp = 0.0;
|
||||
size_t atomicstarttotalsizebytes = 0;
|
||||
|
@ -122,10 +122,7 @@ struct GCCycleMetrics
|
|||
double sweepassisttime = 0.0;
|
||||
double sweepmaxexplicittime = 0.0;
|
||||
size_t sweepexplicitsteps = 0;
|
||||
size_t sweeprequests = 0;
|
||||
|
||||
size_t assistrequests = 0;
|
||||
size_t explicitrequests = 0;
|
||||
size_t sweepwork = 0;
|
||||
|
||||
size_t assistwork = 0;
|
||||
size_t explicitwork = 0;
|
||||
|
@ -203,7 +200,7 @@ typedef struct global_State
|
|||
uint64_t rngstate; /* PCG random number generator state */
|
||||
uint64_t ptrenckey[4]; /* pointer encoding key for display */
|
||||
|
||||
void (*udatagc[LUA_UTAG_LIMIT])(void*); /* for each userdata tag, a gc callback to be called immediately before freeing memory */
|
||||
void (*udatagc[LUA_UTAG_LIMIT])(lua_State*, void*); /* for each userdata tag, a gc callback to be called immediately before freeing memory */
|
||||
|
||||
lua_Callbacks cb;
|
||||
|
||||
|
|
|
@ -33,9 +33,6 @@
|
|||
|
||||
#include <string.h>
|
||||
|
||||
LUAU_FASTFLAGVARIABLE(LuauTableRehashRework, false)
|
||||
LUAU_FASTFLAGVARIABLE(LuauTableNewBoundary, false)
|
||||
|
||||
// max size of both array and hash part is 2^MAXBITS
|
||||
#define MAXBITS 26
|
||||
#define MAXSIZE (1 << MAXBITS)
|
||||
|
@ -390,6 +387,8 @@ static void resize(lua_State* L, Table* t, int nasize, int nhsize)
|
|||
setarrayvector(L, t, nasize);
|
||||
/* create new hash part with appropriate size */
|
||||
setnodevector(L, t, nhsize);
|
||||
/* used for the migration check at the end */
|
||||
LuaNode* nnew = t->node;
|
||||
if (nasize < oldasize)
|
||||
{ /* array part must shrink? */
|
||||
t->sizearray = nasize;
|
||||
|
@ -398,57 +397,51 @@ static void resize(lua_State* L, Table* t, int nasize, int nhsize)
|
|||
{
|
||||
if (!ttisnil(&t->array[i]))
|
||||
{
|
||||
if (FFlag::LuauTableRehashRework)
|
||||
{
|
||||
TValue ok;
|
||||
setnvalue(&ok, cast_num(i + 1));
|
||||
setobjt2t(L, newkey(L, t, &ok), &t->array[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
setobjt2t(L, luaH_setnum(L, t, i + 1), &t->array[i]);
|
||||
}
|
||||
TValue ok;
|
||||
setnvalue(&ok, cast_num(i + 1));
|
||||
setobjt2t(L, newkey(L, t, &ok), &t->array[i]);
|
||||
}
|
||||
}
|
||||
/* shrink array */
|
||||
luaM_reallocarray(L, t->array, oldasize, nasize, TValue, t->memcat);
|
||||
}
|
||||
/* used for the migration check at the end */
|
||||
TValue* anew = t->array;
|
||||
/* re-insert elements from hash part */
|
||||
if (FFlag::LuauTableRehashRework)
|
||||
for (int i = twoto(oldhsize) - 1; i >= 0; i--)
|
||||
{
|
||||
for (int i = twoto(oldhsize) - 1; i >= 0; i--)
|
||||
LuaNode* old = nold + i;
|
||||
if (!ttisnil(gval(old)))
|
||||
{
|
||||
LuaNode* old = nold + i;
|
||||
if (!ttisnil(gval(old)))
|
||||
{
|
||||
TValue ok;
|
||||
getnodekey(L, &ok, old);
|
||||
setobjt2t(L, arrayornewkey(L, t, &ok), gval(old));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = twoto(oldhsize) - 1; i >= 0; i--)
|
||||
{
|
||||
LuaNode* old = nold + i;
|
||||
if (!ttisnil(gval(old)))
|
||||
{
|
||||
TValue ok;
|
||||
getnodekey(L, &ok, old);
|
||||
setobjt2t(L, luaH_set(L, t, &ok), gval(old));
|
||||
}
|
||||
TValue ok;
|
||||
getnodekey(L, &ok, old);
|
||||
setobjt2t(L, arrayornewkey(L, t, &ok), gval(old));
|
||||
}
|
||||
}
|
||||
|
||||
/* make sure we haven't recursively rehashed during element migration */
|
||||
LUAU_ASSERT(nnew == t->node);
|
||||
LUAU_ASSERT(anew == t->array);
|
||||
|
||||
if (nold != dummynode)
|
||||
luaM_freearray(L, nold, twoto(oldhsize), LuaNode, t->memcat); /* free old array */
|
||||
}
|
||||
|
||||
static int adjustasize(Table* t, int size, const TValue* ek)
|
||||
{
|
||||
bool tbound = t->node != dummynode || size < t->sizearray;
|
||||
int ekindex = ek && ttisnumber(ek) ? arrayindex(nvalue(ek)) : -1;
|
||||
/* move the array size up until the boundary is guaranteed to be inside the array part */
|
||||
while (size + 1 == ekindex || (tbound && !ttisnil(luaH_getnum(t, size + 1))))
|
||||
size++;
|
||||
return size;
|
||||
}
|
||||
|
||||
void luaH_resizearray(lua_State* L, Table* t, int nasize)
|
||||
{
|
||||
int nsize = (t->node == dummynode) ? 0 : sizenode(t);
|
||||
resize(L, t, nasize, nsize);
|
||||
int asize = adjustasize(t, nasize, NULL);
|
||||
resize(L, t, asize, nsize);
|
||||
}
|
||||
|
||||
void luaH_resizehash(lua_State* L, Table* t, int nhsize)
|
||||
|
@ -470,21 +463,11 @@ static void rehash(lua_State* L, Table* t, const TValue* ek)
|
|||
totaluse++;
|
||||
/* compute new size for array part */
|
||||
int na = computesizes(nums, &nasize);
|
||||
int nh = totaluse - na;
|
||||
/* enforce the boundary invariant; for performance, only do hash lookups if we must */
|
||||
if (FFlag::LuauTableNewBoundary)
|
||||
{
|
||||
bool tbound = t->node != dummynode || nasize < t->sizearray;
|
||||
int ekindex = ttisnumber(ek) ? arrayindex(nvalue(ek)) : -1;
|
||||
/* move the array size up until the boundary is guaranteed to be inside the array part */
|
||||
while (nasize + 1 == ekindex || (tbound && !ttisnil(luaH_getnum(t, nasize + 1))))
|
||||
{
|
||||
nasize++;
|
||||
na++;
|
||||
}
|
||||
}
|
||||
nasize = adjustasize(t, nasize, ek);
|
||||
/* resize the table to new computed sizes */
|
||||
LUAU_ASSERT(na <= totaluse);
|
||||
resize(L, t, nasize, totaluse - na);
|
||||
resize(L, t, nasize, nh);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -544,11 +527,11 @@ static LuaNode* getfreepos(Table* t)
|
|||
static TValue* newkey(lua_State* L, Table* t, const TValue* key)
|
||||
{
|
||||
/* enforce boundary invariant */
|
||||
if (FFlag::LuauTableNewBoundary && ttisnumber(key) && nvalue(key) == t->sizearray + 1)
|
||||
if (ttisnumber(key) && nvalue(key) == t->sizearray + 1)
|
||||
{
|
||||
rehash(L, t, key); /* grow table */
|
||||
|
||||
// after rehash, numeric keys might be located in the new array part, but won't be found in the node part
|
||||
/* after rehash, numeric keys might be located in the new array part, but won't be found in the node part */
|
||||
return arrayornewkey(L, t, key);
|
||||
}
|
||||
|
||||
|
@ -560,15 +543,8 @@ static TValue* newkey(lua_State* L, Table* t, const TValue* key)
|
|||
{ /* cannot find a free place? */
|
||||
rehash(L, t, key); /* grow table */
|
||||
|
||||
if (!FFlag::LuauTableRehashRework)
|
||||
{
|
||||
return luaH_set(L, t, key); /* re-insert key into grown table */
|
||||
}
|
||||
else
|
||||
{
|
||||
// after rehash, numeric keys might be located in the new array part, but won't be found in the node part
|
||||
return arrayornewkey(L, t, key);
|
||||
}
|
||||
/* after rehash, numeric keys might be located in the new array part, but won't be found in the node part */
|
||||
return arrayornewkey(L, t, key);
|
||||
}
|
||||
LUAU_ASSERT(n != dummynode);
|
||||
TValue mk;
|
||||
|
@ -733,37 +709,6 @@ TValue* luaH_setstr(lua_State* L, Table* t, TString* key)
|
|||
}
|
||||
}
|
||||
|
||||
static LUAU_NOINLINE int unbound_search(Table* t, unsigned int j)
|
||||
{
|
||||
LUAU_ASSERT(!FFlag::LuauTableNewBoundary);
|
||||
unsigned int i = j; /* i is zero or a present index */
|
||||
j++;
|
||||
/* find `i' and `j' such that i is present and j is not */
|
||||
while (!ttisnil(luaH_getnum(t, j)))
|
||||
{
|
||||
i = j;
|
||||
j *= 2;
|
||||
if (j > cast_to(unsigned int, INT_MAX))
|
||||
{ /* overflow? */
|
||||
/* table was built with bad purposes: resort to linear search */
|
||||
i = 1;
|
||||
while (!ttisnil(luaH_getnum(t, i)))
|
||||
i++;
|
||||
return i - 1;
|
||||
}
|
||||
}
|
||||
/* now do a binary search between them */
|
||||
while (j - i > 1)
|
||||
{
|
||||
unsigned int m = (i + j) / 2;
|
||||
if (ttisnil(luaH_getnum(t, m)))
|
||||
j = m;
|
||||
else
|
||||
i = m;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
static int updateaboundary(Table* t, int boundary)
|
||||
{
|
||||
if (boundary < t->sizearray && ttisnil(&t->array[boundary - 1]))
|
||||
|
@ -820,17 +765,12 @@ int luaH_getn(Table* t)
|
|||
maybesetaboundary(t, boundary);
|
||||
return boundary;
|
||||
}
|
||||
else if (FFlag::LuauTableNewBoundary)
|
||||
else
|
||||
{
|
||||
/* validate boundary invariant */
|
||||
LUAU_ASSERT(t->node == dummynode || ttisnil(luaH_getnum(t, j + 1)));
|
||||
return j;
|
||||
}
|
||||
/* else must find a boundary in hash part */
|
||||
else if (t->node == dummynode) /* hash part is empty? */
|
||||
return j; /* that is easy... */
|
||||
else
|
||||
return unbound_search(t, j);
|
||||
}
|
||||
|
||||
Table* luaH_clone(lua_State* L, Table* tt)
|
||||
|
|
|
@ -199,9 +199,9 @@ static int tmove(lua_State* L)
|
|||
int tt = !lua_isnoneornil(L, 5) ? 5 : 1; /* destination table */
|
||||
luaL_checktype(L, tt, LUA_TTABLE);
|
||||
|
||||
void (*telemetrycb)(lua_State* L, int f, int e, int t, int nf, int nt) = lua_table_move_telemetry;
|
||||
void (*telemetrycb)(lua_State * L, int f, int e, int t, int nf, int nt) = lua_table_move_telemetry;
|
||||
|
||||
if (DFFlag::LuauTableMoveTelemetry2 && telemetrycb)
|
||||
if (DFFlag::LuauTableMoveTelemetry2 && telemetrycb && e >= f)
|
||||
{
|
||||
int nf = lua_objlen(L, 1);
|
||||
int nt = lua_objlen(L, tt);
|
||||
|
|
|
@ -37,6 +37,8 @@ const char* const luaT_eventname[] = {
|
|||
"__newindex",
|
||||
"__mode",
|
||||
"__namecall",
|
||||
"__call",
|
||||
"__iter",
|
||||
|
||||
"__eq",
|
||||
|
||||
|
@ -54,13 +56,13 @@ const char* const luaT_eventname[] = {
|
|||
"__lt",
|
||||
"__le",
|
||||
"__concat",
|
||||
"__call",
|
||||
"__type",
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
static_assert(sizeof(luaT_typenames) / sizeof(luaT_typenames[0]) == LUA_T_COUNT, "luaT_typenames size mismatch");
|
||||
static_assert(sizeof(luaT_eventname) / sizeof(luaT_eventname[0]) == TM_N, "luaT_eventname size mismatch");
|
||||
static_assert(TM_EQ < 8, "fasttm optimization stores a bitfield with metamethods in a byte");
|
||||
|
||||
void luaT_init(lua_State* L)
|
||||
{
|
||||
|
|
|
@ -16,6 +16,8 @@ typedef enum
|
|||
TM_NEWINDEX,
|
||||
TM_MODE,
|
||||
TM_NAMECALL,
|
||||
TM_CALL,
|
||||
TM_ITER,
|
||||
|
||||
TM_EQ, /* last tag method with `fast' access */
|
||||
|
||||
|
@ -33,7 +35,6 @@ typedef enum
|
|||
TM_LT,
|
||||
TM_LE,
|
||||
TM_CONCAT,
|
||||
TM_CALL,
|
||||
TM_TYPE,
|
||||
|
||||
TM_N /* number of elements in the enum */
|
||||
|
|
|
@ -22,14 +22,21 @@ Udata* luaU_newudata(lua_State* L, size_t s, int tag)
|
|||
|
||||
void luaU_freeudata(lua_State* L, Udata* u, lua_Page* page)
|
||||
{
|
||||
void (*dtor)(void*) = nullptr;
|
||||
if (u->tag < LUA_UTAG_LIMIT)
|
||||
{
|
||||
void (*dtor)(lua_State*, void*) = nullptr;
|
||||
dtor = L->global->udatagc[u->tag];
|
||||
if (dtor)
|
||||
dtor(L, u->data);
|
||||
}
|
||||
else if (u->tag == UTAG_IDTOR)
|
||||
{
|
||||
void (*dtor)(void*) = nullptr;
|
||||
memcpy(&dtor, &u->data + u->len - sizeof(dtor), sizeof(dtor));
|
||||
if (dtor)
|
||||
dtor(u->data);
|
||||
}
|
||||
|
||||
if (dtor)
|
||||
dtor(u->data);
|
||||
|
||||
luaM_freegco(L, u, sizeudata(u->len), u->memcat, page);
|
||||
}
|
||||
|
|
|
@ -16,7 +16,10 @@
|
|||
|
||||
#include <string.h>
|
||||
|
||||
LUAU_FASTFLAG(LuauTableNewBoundary)
|
||||
LUAU_FASTFLAGVARIABLE(LuauIter, false)
|
||||
LUAU_DYNAMIC_FASTFLAGVARIABLE(LuauIterCallTelemetry, false)
|
||||
|
||||
void (*lua_iter_call_telemetry)(lua_State* L);
|
||||
|
||||
// Disable c99-designator to avoid the warning in CGOTO dispatch table
|
||||
#ifdef __clang__
|
||||
|
@ -110,7 +113,7 @@ LUAU_FASTFLAG(LuauTableNewBoundary)
|
|||
VM_DISPATCH_OP(LOP_FORGLOOP_NEXT), VM_DISPATCH_OP(LOP_GETVARARGS), VM_DISPATCH_OP(LOP_DUPCLOSURE), VM_DISPATCH_OP(LOP_PREPVARARGS), \
|
||||
VM_DISPATCH_OP(LOP_LOADKX), VM_DISPATCH_OP(LOP_JUMPX), VM_DISPATCH_OP(LOP_FASTCALL), VM_DISPATCH_OP(LOP_COVERAGE), \
|
||||
VM_DISPATCH_OP(LOP_CAPTURE), VM_DISPATCH_OP(LOP_JUMPIFEQK), VM_DISPATCH_OP(LOP_JUMPIFNOTEQK), VM_DISPATCH_OP(LOP_FASTCALL1), \
|
||||
VM_DISPATCH_OP(LOP_FASTCALL2), VM_DISPATCH_OP(LOP_FASTCALL2K),
|
||||
VM_DISPATCH_OP(LOP_FASTCALL2), VM_DISPATCH_OP(LOP_FASTCALL2K), VM_DISPATCH_OP(LOP_FORGPREP),
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#define VM_USE_CGOTO 1
|
||||
|
@ -150,8 +153,20 @@ LUAU_NOINLINE static void luau_prepareFORN(lua_State* L, StkId plimit, StkId pst
|
|||
|
||||
LUAU_NOINLINE static bool luau_loopFORG(lua_State* L, int a, int c)
|
||||
{
|
||||
// note: it's safe to push arguments past top for complicated reasons (see top of the file)
|
||||
StkId ra = &L->base[a];
|
||||
LUAU_ASSERT(ra + 6 <= L->top);
|
||||
LUAU_ASSERT(ra + 3 <= L->top);
|
||||
|
||||
if (DFFlag::LuauIterCallTelemetry)
|
||||
{
|
||||
/* TODO: we might be able to stop supporting this depending on whether it's used in practice */
|
||||
void (*telemetrycb)(lua_State* L) = lua_iter_call_telemetry;
|
||||
|
||||
if (telemetrycb && ttistable(ra) && fasttm(L, hvalue(ra)->metatable, TM_CALL))
|
||||
telemetrycb(L);
|
||||
if (telemetrycb && ttisuserdata(ra) && fasttm(L, uvalue(ra)->metatable, TM_CALL))
|
||||
telemetrycb(L);
|
||||
}
|
||||
|
||||
setobjs2s(L, ra + 3 + 2, ra + 2);
|
||||
setobjs2s(L, ra + 3 + 1, ra + 1);
|
||||
|
@ -2204,20 +2219,149 @@ static void luau_execute(lua_State* L)
|
|||
}
|
||||
}
|
||||
|
||||
VM_CASE(LOP_FORGPREP)
|
||||
{
|
||||
Instruction insn = *pc++;
|
||||
StkId ra = VM_REG(LUAU_INSN_A(insn));
|
||||
|
||||
if (ttisfunction(ra))
|
||||
{
|
||||
/* will be called during FORGLOOP */
|
||||
}
|
||||
else if (FFlag::LuauIter)
|
||||
{
|
||||
Table* mt = ttistable(ra) ? hvalue(ra)->metatable : ttisuserdata(ra) ? uvalue(ra)->metatable : cast_to(Table*, NULL);
|
||||
|
||||
if (const TValue* fn = fasttm(L, mt, TM_ITER))
|
||||
{
|
||||
setobj2s(L, ra + 1, ra);
|
||||
setobj2s(L, ra, fn);
|
||||
|
||||
L->top = ra + 2; /* func + self arg */
|
||||
LUAU_ASSERT(L->top <= L->stack_last);
|
||||
|
||||
VM_PROTECT(luaD_call(L, ra, 3));
|
||||
L->top = L->ci->top;
|
||||
}
|
||||
else if (fasttm(L, mt, TM_CALL))
|
||||
{
|
||||
/* table or userdata with __call, will be called during FORGLOOP */
|
||||
/* TODO: we might be able to stop supporting this depending on whether it's used in practice */
|
||||
}
|
||||
else if (ttistable(ra))
|
||||
{
|
||||
/* set up registers for builtin iteration */
|
||||
setobj2s(L, ra + 1, ra);
|
||||
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(0)));
|
||||
setnilvalue(ra);
|
||||
}
|
||||
else
|
||||
{
|
||||
VM_PROTECT(luaG_typeerror(L, ra, "iterate over"));
|
||||
}
|
||||
}
|
||||
|
||||
pc += LUAU_INSN_D(insn);
|
||||
LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
|
||||
VM_NEXT();
|
||||
}
|
||||
|
||||
VM_CASE(LOP_FORGLOOP)
|
||||
{
|
||||
VM_INTERRUPT();
|
||||
Instruction insn = *pc++;
|
||||
StkId ra = VM_REG(LUAU_INSN_A(insn));
|
||||
uint32_t aux = *pc;
|
||||
|
||||
// note: this is a slow generic path, fast-path is FORGLOOP_INEXT/NEXT
|
||||
bool stop;
|
||||
VM_PROTECT(stop = luau_loopFORG(L, LUAU_INSN_A(insn), aux));
|
||||
if (!FFlag::LuauIter)
|
||||
{
|
||||
bool stop;
|
||||
VM_PROTECT(stop = luau_loopFORG(L, LUAU_INSN_A(insn), aux));
|
||||
|
||||
// note that we need to increment pc by 1 to exit the loop since we need to skip over aux
|
||||
pc += stop ? 1 : LUAU_INSN_D(insn);
|
||||
LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
|
||||
VM_NEXT();
|
||||
// note that we need to increment pc by 1 to exit the loop since we need to skip over aux
|
||||
pc += stop ? 1 : LUAU_INSN_D(insn);
|
||||
LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
|
||||
VM_NEXT();
|
||||
}
|
||||
|
||||
// fast-path: builtin table iteration
|
||||
if (ttisnil(ra) && ttistable(ra + 1) && ttislightuserdata(ra + 2))
|
||||
{
|
||||
Table* h = hvalue(ra + 1);
|
||||
int index = int(reinterpret_cast<uintptr_t>(pvalue(ra + 2)));
|
||||
|
||||
int sizearray = h->sizearray;
|
||||
int sizenode = 1 << h->lsizenode;
|
||||
|
||||
// clear extra variables since we might have more than two
|
||||
if (LUAU_UNLIKELY(aux > 2))
|
||||
for (int i = 2; i < int(aux); ++i)
|
||||
setnilvalue(ra + 3 + i);
|
||||
|
||||
// first we advance index through the array portion
|
||||
while (unsigned(index) < unsigned(sizearray))
|
||||
{
|
||||
if (!ttisnil(&h->array[index]))
|
||||
{
|
||||
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)));
|
||||
setnvalue(ra + 3, double(index + 1));
|
||||
setobj2s(L, ra + 4, &h->array[index]);
|
||||
|
||||
pc += LUAU_INSN_D(insn);
|
||||
LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
|
||||
VM_NEXT();
|
||||
}
|
||||
|
||||
index++;
|
||||
}
|
||||
|
||||
// then we advance index through the hash portion
|
||||
while (unsigned(index - sizearray) < unsigned(sizenode))
|
||||
{
|
||||
LuaNode* n = &h->node[index - sizearray];
|
||||
|
||||
if (!ttisnil(gval(n)))
|
||||
{
|
||||
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)));
|
||||
getnodekey(L, ra + 3, n);
|
||||
setobj2s(L, ra + 4, gval(n));
|
||||
|
||||
pc += LUAU_INSN_D(insn);
|
||||
LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
|
||||
VM_NEXT();
|
||||
}
|
||||
|
||||
index++;
|
||||
}
|
||||
|
||||
// fallthrough to exit
|
||||
pc++;
|
||||
VM_NEXT();
|
||||
}
|
||||
else
|
||||
{
|
||||
// note: it's safe to push arguments past top for complicated reasons (see top of the file)
|
||||
setobjs2s(L, ra + 3 + 2, ra + 2);
|
||||
setobjs2s(L, ra + 3 + 1, ra + 1);
|
||||
setobjs2s(L, ra + 3, ra);
|
||||
|
||||
L->top = ra + 3 + 3; /* func + 2 args (state and index) */
|
||||
LUAU_ASSERT(L->top <= L->stack_last);
|
||||
|
||||
VM_PROTECT(luaD_call(L, ra + 3, aux));
|
||||
L->top = L->ci->top;
|
||||
|
||||
// recompute ra since stack might have been reallocated
|
||||
ra = VM_REG(LUAU_INSN_A(insn));
|
||||
|
||||
// copy first variable back into the iteration index
|
||||
setobjs2s(L, ra + 2, ra + 3);
|
||||
|
||||
// note that we need to increment pc by 1 to exit the loop since we need to skip over aux
|
||||
pc += ttisnil(ra + 3) ? 1 : LUAU_INSN_D(insn);
|
||||
LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
|
||||
VM_NEXT();
|
||||
}
|
||||
}
|
||||
|
||||
VM_CASE(LOP_FORGPREP_INEXT)
|
||||
|
@ -2228,8 +2372,15 @@ static void luau_execute(lua_State* L)
|
|||
// fast-path: ipairs/inext
|
||||
if (cl->env->safeenv && ttistable(ra + 1) && ttisnumber(ra + 2) && nvalue(ra + 2) == 0.0)
|
||||
{
|
||||
if (FFlag::LuauIter)
|
||||
setnilvalue(ra);
|
||||
|
||||
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(0)));
|
||||
}
|
||||
else if (FFlag::LuauIter && !ttisfunction(ra))
|
||||
{
|
||||
VM_PROTECT(luaG_typeerror(L, ra, "iterate over"));
|
||||
}
|
||||
|
||||
pc += LUAU_INSN_D(insn);
|
||||
LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
|
||||
|
@ -2268,23 +2419,9 @@ static void luau_execute(lua_State* L)
|
|||
VM_NEXT();
|
||||
}
|
||||
}
|
||||
else if (FFlag::LuauTableNewBoundary || (h->lsizenode == 0 && ttisnil(gval(h->node))))
|
||||
{
|
||||
// fallthrough to exit
|
||||
VM_NEXT();
|
||||
}
|
||||
else
|
||||
{
|
||||
// the table has a hash part; index + 1 may appear in it in which case we need to iterate through the hash portion as well
|
||||
const TValue* val = luaH_getnum(h, index + 1);
|
||||
|
||||
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)));
|
||||
setnvalue(ra + 3, double(index + 1));
|
||||
setobj2s(L, ra + 4, val);
|
||||
|
||||
// note that nil elements inside the array terminate the traversal
|
||||
pc += ttisnil(ra + 4) ? 0 : LUAU_INSN_D(insn);
|
||||
LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
|
||||
// fallthrough to exit
|
||||
VM_NEXT();
|
||||
}
|
||||
}
|
||||
|
@ -2308,8 +2445,15 @@ static void luau_execute(lua_State* L)
|
|||
// fast-path: pairs/next
|
||||
if (cl->env->safeenv && ttistable(ra + 1) && ttisnil(ra + 2))
|
||||
{
|
||||
if (FFlag::LuauIter)
|
||||
setnilvalue(ra);
|
||||
|
||||
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(0)));
|
||||
}
|
||||
else if (FFlag::LuauIter && !ttisfunction(ra))
|
||||
{
|
||||
VM_PROTECT(luaG_typeerror(L, ra, "iterate over"));
|
||||
}
|
||||
|
||||
pc += LUAU_INSN_D(insn);
|
||||
LUAU_ASSERT(unsigned(pc - cl->l.p->code) < unsigned(cl->l.p->sizecode));
|
||||
|
@ -2704,7 +2848,7 @@ static void luau_execute(lua_State* L)
|
|||
{
|
||||
VM_PROTECT_PC();
|
||||
|
||||
int n = f(L, ra, arg, nresults, nullptr, nparams);
|
||||
int n = f(L, ra, arg, nresults, NULL, nparams);
|
||||
|
||||
if (n >= 0)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue