Sync to upstream/release/572 (#899)

* Fixed exported types not being suggested in autocomplete
* `T...` is now convertible to `...any` (Fixes
https://github.com/Roblox/luau/issues/767)
* Fixed issue with `T?` not being convertible to `T | T` or `T?`
(sometimes when internal pointer identity is different)
* Fixed potential crash in missing table key error suggestion to use a
similar existing key
* `lua_topointer` now returns a pointer for strings

C++ API Changes:
* `prepareModuleScope` callback has moved from TypeChecker to Frontend
* For LSPs, AstQuery functions (and `isWithinComment`) can be used
without full Frontend data

A lot of changes in our two experimental components as well.

In our work on the new type-solver, the following issues were fixed:
* Fixed table union and intersection indexing
* Correct custom type environments are now used
* Fixed issue with values of `free & number` type not accepted in
numeric operations

And these are the changes in native code generation (JIT):
* arm64 lowering is almost complete with support for 99% of IR commands
and all fastcalls
* Fixed x64 assembly encoding for extended byte registers
* More external x64 calls are aware of register allocator
* `math.min`/`math.max` with more than 2 arguments are now lowered to IR
as well
* Fixed correctness issues with `math` library calls with multiple
results in variadic context and with x64 register conflicts
* x64 register allocator learnt to restore values from VM memory instead
of always using stack spills
* x64 exception unwind information now supports multiple functions and
fixes function start offset in Dwarf2 info
This commit is contained in:
vegorov-rbx 2023-04-14 21:06:22 +03:00 committed by GitHub
parent 7345891f6b
commit d141a5c48d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
88 changed files with 2579 additions and 1433 deletions

View File

@ -64,8 +64,11 @@ private:
};
std::vector<AstNode*> findAncestryAtPositionForAutocomplete(const SourceModule& source, Position pos);
std::vector<AstNode*> findAncestryAtPositionForAutocomplete(AstStatBlock* root, Position pos);
std::vector<AstNode*> findAstAncestryOfPosition(const SourceModule& source, Position pos, bool includeTypes = false);
std::vector<AstNode*> findAstAncestryOfPosition(AstStatBlock* root, Position pos, bool includeTypes = false);
AstNode* findNodeAtPosition(const SourceModule& source, Position pos);
AstNode* findNodeAtPosition(AstStatBlock* root, Position pos);
AstExpr* findExprAtPosition(const SourceModule& source, Position pos);
ScopePtr findScopeAtPosition(const Module& module, Position pos);
std::optional<Binding> findBindingAtPosition(const Module& module, const SourceModule& source, Position pos);

View File

@ -165,7 +165,15 @@ struct Frontend
bool captureComments, bool typeCheckForAutocomplete = false);
private:
ModulePtr check(const SourceModule& sourceModule, Mode mode, std::vector<RequireCycle> requireCycles, bool forAutocomplete = false, bool recordJsonLog = false);
struct TypeCheckLimits
{
std::optional<double> finishTime;
std::optional<int> instantiationChildLimit;
std::optional<int> unifierIterationLimit;
};
ModulePtr check(const SourceModule& sourceModule, Mode mode, std::vector<RequireCycle> requireCycles, std::optional<ScopePtr> environmentScope,
bool forAutocomplete, bool recordJsonLog, TypeCheckLimits typeCheckLimits);
std::pair<SourceNode*, SourceModule*> getSourceNode(const ModuleName& name);
SourceModule parse(const ModuleName& name, std::string_view src, const ParseOptions& parseOptions);
@ -185,15 +193,21 @@ public:
const NotNull<BuiltinTypes> builtinTypes;
FileResolver* fileResolver;
FrontendModuleResolver moduleResolver;
FrontendModuleResolver moduleResolverForAutocomplete;
GlobalTypes globals;
GlobalTypes globalsForAutocomplete;
TypeChecker typeChecker;
TypeChecker typeCheckerForAutocomplete;
// TODO: remove with FFlagLuauOnDemandTypecheckers
TypeChecker typeChecker_DEPRECATED;
TypeChecker typeCheckerForAutocomplete_DEPRECATED;
ConfigResolver* configResolver;
FrontendOptions options;
InternalErrorReporter iceHandler;
std::function<void(const ModuleName& name, const ScopePtr& scope, bool forAutocomplete)> prepareModuleScope;
std::unordered_map<ModuleName, SourceNode> sourceNodes;
std::unordered_map<ModuleName, SourceModule> sourceModules;

View File

@ -51,6 +51,7 @@ struct SourceModule
};
bool isWithinComment(const SourceModule& sourceModule, Position pos);
bool isWithinComment(const ParseResult& result, Position pos);
struct RequireCycle
{

View File

@ -738,6 +738,7 @@ const T* get(TypeId tv)
return get_if<T>(&tv->ty);
}
template<typename T>
T* getMutable(TypeId tv)
{
@ -897,6 +898,19 @@ bool hasTag(TypeId ty, const std::string& tagName);
bool hasTag(const Property& prop, const std::string& tagName);
bool hasTag(const Tags& tags, const std::string& tagName); // Do not use in new work.
template<typename T>
bool hasTypeInIntersection(TypeId ty)
{
TypeId tf = follow(ty);
if (get<T>(tf))
return true;
for (auto t : flattenIntersection(tf))
if (get<T>(follow(t)))
return true;
return false;
}
bool hasPrimitiveTypeInIntersection(TypeId ty, PrimitiveType::Type primTy);
/*
* Use this to change the kind of a particular type.
*

View File

@ -137,9 +137,9 @@ private:
public:
// Returns true if the type "needle" already occurs within "haystack" and reports an "infinite type error"
bool occursCheck(TypeId needle, TypeId haystack);
bool occursCheck(TypeId needle, TypeId haystack, bool reversed);
bool occursCheck(DenseHashSet<TypeId>& seen, TypeId needle, TypeId haystack);
bool occursCheck(TypePackId needle, TypePackId haystack);
bool occursCheck(TypePackId needle, TypePackId haystack, bool reversed);
bool occursCheck(DenseHashSet<TypePackId>& seen, TypePackId needle, TypePackId haystack);
Unifier makeChildUnifier();

View File

@ -211,33 +211,48 @@ struct FindFullAncestry final : public AstVisitor
std::vector<AstNode*> findAncestryAtPositionForAutocomplete(const SourceModule& source, Position pos)
{
AutocompleteNodeFinder finder{pos, source.root};
source.root->visit(&finder);
return findAncestryAtPositionForAutocomplete(source.root, pos);
}
std::vector<AstNode*> findAncestryAtPositionForAutocomplete(AstStatBlock* root, Position pos)
{
AutocompleteNodeFinder finder{pos, root};
root->visit(&finder);
return finder.ancestry;
}
std::vector<AstNode*> findAstAncestryOfPosition(const SourceModule& source, Position pos, bool includeTypes)
{
const Position end = source.root->location.end;
return findAstAncestryOfPosition(source.root, pos, includeTypes);
}
std::vector<AstNode*> findAstAncestryOfPosition(AstStatBlock* root, Position pos, bool includeTypes)
{
const Position end = root->location.end;
if (pos > end)
pos = end;
FindFullAncestry finder(pos, end, includeTypes);
source.root->visit(&finder);
root->visit(&finder);
return finder.nodes;
}
AstNode* findNodeAtPosition(const SourceModule& source, Position pos)
{
const Position end = source.root->location.end;
if (pos < source.root->location.begin)
return source.root;
return findNodeAtPosition(source.root, pos);
}
AstNode* findNodeAtPosition(AstStatBlock* root, Position pos)
{
const Position end = root->location.end;
if (pos < root->location.begin)
return root;
if (pos > end)
pos = end;
FindNode findNode{pos, end};
findNode.visit(source.root);
findNode.visit(root);
return findNode.best;
}

View File

@ -595,6 +595,11 @@ bool ConstraintSolver::tryDispatch(const BinaryConstraint& c, NotNull<const Cons
* make any sense to stop and wait for someone else to do it.
*/
// If any is present, the expression must evaluate to any as well.
bool leftAny = get<AnyType>(leftType) || get<ErrorType>(leftType);
bool rightAny = get<AnyType>(rightType) || get<ErrorType>(rightType);
bool anyPresent = leftAny || rightAny;
if (isBlocked(leftType) && leftType != resultType)
return block(c.leftType, constraint);
@ -604,12 +609,12 @@ bool ConstraintSolver::tryDispatch(const BinaryConstraint& c, NotNull<const Cons
if (!force)
{
// Logical expressions may proceed if the LHS is free.
if (get<FreeType>(leftType) && !isLogical)
if (hasTypeInIntersection<FreeType>(leftType) && !isLogical)
return block(leftType, constraint);
}
// Logical expressions may proceed if the LHS is free.
if (isBlocked(leftType) || (get<FreeType>(leftType) && !isLogical))
if (isBlocked(leftType) || (hasTypeInIntersection<FreeType>(leftType) && !isLogical))
{
asMutable(resultType)->ty.emplace<BoundType>(errorRecoveryType());
unblock(resultType);
@ -696,11 +701,6 @@ bool ConstraintSolver::tryDispatch(const BinaryConstraint& c, NotNull<const Cons
// If there's no metamethod available, fall back to primitive behavior.
}
// If any is present, the expression must evaluate to any as well.
bool leftAny = get<AnyType>(leftType) || get<ErrorType>(leftType);
bool rightAny = get<AnyType>(rightType) || get<ErrorType>(rightType);
bool anyPresent = leftAny || rightAny;
switch (c.op)
{
// For arithmetic operators, if the LHS is a number, the RHS must be a
@ -711,6 +711,8 @@ bool ConstraintSolver::tryDispatch(const BinaryConstraint& c, NotNull<const Cons
case AstExprBinary::Op::Div:
case AstExprBinary::Op::Pow:
case AstExprBinary::Op::Mod:
if (hasTypeInIntersection<FreeType>(leftType) && force)
asMutable(leftType)->ty.emplace<BoundType>(anyPresent ? builtinTypes->anyType : builtinTypes->numberType);
if (isNumber(leftType))
{
unify(leftType, rightType, constraint->scope);
@ -723,6 +725,8 @@ bool ConstraintSolver::tryDispatch(const BinaryConstraint& c, NotNull<const Cons
// For concatenation, if the LHS is a string, the RHS must be a string as
// well. The result will also be a string.
case AstExprBinary::Op::Concat:
if (hasTypeInIntersection<FreeType>(leftType) && force)
asMutable(leftType)->ty.emplace<BoundType>(anyPresent ? builtinTypes->anyType : builtinTypes->stringType);
if (isString(leftType))
{
unify(leftType, rightType, constraint->scope);

View File

@ -31,7 +31,8 @@ LUAU_FASTFLAG(LuauInferInNoCheckMode)
LUAU_FASTFLAGVARIABLE(LuauKnowsTheDataModel3, false)
LUAU_FASTINTVARIABLE(LuauAutocompleteCheckTimeoutMs, 100)
LUAU_FASTFLAGVARIABLE(DebugLuauDeferredConstraintResolution, false)
LUAU_FASTFLAGVARIABLE(DebugLuauLogSolverToJson, false);
LUAU_FASTFLAGVARIABLE(DebugLuauLogSolverToJson, false)
LUAU_FASTFLAGVARIABLE(LuauOnDemandTypecheckers, false)
namespace Luau
{
@ -131,8 +132,8 @@ static void persistCheckedTypes(ModulePtr checkedModule, GlobalTypes& globals, S
LoadDefinitionFileResult Frontend::loadDefinitionFile(GlobalTypes& globals, ScopePtr targetScope, std::string_view source,
const std::string& packageName, bool captureComments, bool typeCheckForAutocomplete)
{
if (!FFlag::DebugLuauDeferredConstraintResolution)
return Luau::loadDefinitionFileNoDCR(typeCheckForAutocomplete ? typeCheckerForAutocomplete : typeChecker,
if (!FFlag::DebugLuauDeferredConstraintResolution && !FFlag::LuauOnDemandTypecheckers)
return Luau::loadDefinitionFileNoDCR(typeCheckForAutocomplete ? typeCheckerForAutocomplete_DEPRECATED : typeChecker_DEPRECATED,
typeCheckForAutocomplete ? globalsForAutocomplete : globals, targetScope, source, packageName, captureComments);
LUAU_TIMETRACE_SCOPE("loadDefinitionFile", "Frontend");
@ -142,7 +143,7 @@ LoadDefinitionFileResult Frontend::loadDefinitionFile(GlobalTypes& globals, Scop
if (parseResult.errors.size() > 0)
return LoadDefinitionFileResult{false, parseResult, sourceModule, nullptr};
ModulePtr checkedModule = check(sourceModule, Mode::Definition, {});
ModulePtr checkedModule = check(sourceModule, Mode::Definition, {}, std::nullopt, /*forAutocomplete*/ false, /*recordJsonLog*/ false, {});
if (checkedModule->errors.size() > 0)
return LoadDefinitionFileResult{false, parseResult, sourceModule, checkedModule};
@ -155,6 +156,7 @@ LoadDefinitionFileResult Frontend::loadDefinitionFile(GlobalTypes& globals, Scop
LoadDefinitionFileResult loadDefinitionFileNoDCR(TypeChecker& typeChecker, GlobalTypes& globals, ScopePtr targetScope, std::string_view source,
const std::string& packageName, bool captureComments)
{
LUAU_ASSERT(!FFlag::LuauOnDemandTypecheckers);
LUAU_TIMETRACE_SCOPE("loadDefinitionFile", "Frontend");
Luau::SourceModule sourceModule;
@ -406,8 +408,8 @@ Frontend::Frontend(FileResolver* fileResolver, ConfigResolver* configResolver, c
, moduleResolverForAutocomplete(this)
, globals(builtinTypes)
, globalsForAutocomplete(builtinTypes)
, typeChecker(globals.globalScope, &moduleResolver, builtinTypes, &iceHandler)
, typeCheckerForAutocomplete(globalsForAutocomplete.globalScope, &moduleResolverForAutocomplete, builtinTypes, &iceHandler)
, typeChecker_DEPRECATED(globals.globalScope, &moduleResolver, builtinTypes, &iceHandler)
, typeCheckerForAutocomplete_DEPRECATED(globalsForAutocomplete.globalScope, &moduleResolverForAutocomplete, builtinTypes, &iceHandler)
, configResolver(configResolver)
, options(options)
{
@ -491,35 +493,68 @@ CheckResult Frontend::check(const ModuleName& name, std::optional<FrontendOption
if (frontendOptions.forAutocomplete)
{
// The autocomplete typecheck is always in strict mode with DM awareness
// to provide better type information for IDE features
typeCheckerForAutocomplete.requireCycles = requireCycles;
ModulePtr moduleForAutocomplete;
double autocompleteTimeLimit = FInt::LuauAutocompleteCheckTimeoutMs / 1000.0;
if (autocompleteTimeLimit != 0.0)
typeCheckerForAutocomplete.finishTime = TimeTrace::getClock() + autocompleteTimeLimit;
else
typeCheckerForAutocomplete.finishTime = std::nullopt;
if (!FFlag::LuauOnDemandTypecheckers)
{
// The autocomplete typecheck is always in strict mode with DM awareness
// to provide better type information for IDE features
typeCheckerForAutocomplete_DEPRECATED.requireCycles = requireCycles;
// TODO: This is a dirty ad hoc solution for autocomplete timeouts
// We are trying to dynamically adjust our existing limits to lower total typechecking time under the limit
// so that we'll have type information for the whole file at lower quality instead of a full abort in the middle
if (FInt::LuauTarjanChildLimit > 0)
typeCheckerForAutocomplete.instantiationChildLimit = std::max(1, int(FInt::LuauTarjanChildLimit * sourceNode.autocompleteLimitsMult));
else
typeCheckerForAutocomplete.instantiationChildLimit = std::nullopt;
if (autocompleteTimeLimit != 0.0)
typeCheckerForAutocomplete_DEPRECATED.finishTime = TimeTrace::getClock() + autocompleteTimeLimit;
else
typeCheckerForAutocomplete_DEPRECATED.finishTime = std::nullopt;
if (FInt::LuauTypeInferIterationLimit > 0)
typeCheckerForAutocomplete.unifierIterationLimit =
std::max(1, int(FInt::LuauTypeInferIterationLimit * sourceNode.autocompleteLimitsMult));
else
typeCheckerForAutocomplete.unifierIterationLimit = std::nullopt;
// TODO: This is a dirty ad hoc solution for autocomplete timeouts
// We are trying to dynamically adjust our existing limits to lower total typechecking time under the limit
// so that we'll have type information for the whole file at lower quality instead of a full abort in the middle
if (FInt::LuauTarjanChildLimit > 0)
typeCheckerForAutocomplete_DEPRECATED.instantiationChildLimit =
std::max(1, int(FInt::LuauTarjanChildLimit * sourceNode.autocompleteLimitsMult));
else
typeCheckerForAutocomplete_DEPRECATED.instantiationChildLimit = std::nullopt;
ModulePtr moduleForAutocomplete =
FFlag::DebugLuauDeferredConstraintResolution
? check(sourceModule, Mode::Strict, requireCycles, /*forAutocomplete*/ true, /*recordJsonLog*/ false)
: typeCheckerForAutocomplete.check(sourceModule, Mode::Strict, environmentScope);
if (FInt::LuauTypeInferIterationLimit > 0)
typeCheckerForAutocomplete_DEPRECATED.unifierIterationLimit =
std::max(1, int(FInt::LuauTypeInferIterationLimit * sourceNode.autocompleteLimitsMult));
else
typeCheckerForAutocomplete_DEPRECATED.unifierIterationLimit = std::nullopt;
moduleForAutocomplete =
FFlag::DebugLuauDeferredConstraintResolution
? check(sourceModule, Mode::Strict, requireCycles, environmentScope, /*forAutocomplete*/ true, /*recordJsonLog*/ false, {})
: typeCheckerForAutocomplete_DEPRECATED.check(sourceModule, Mode::Strict, environmentScope);
}
else
{
// The autocomplete typecheck is always in strict mode with DM awareness
// to provide better type information for IDE features
TypeCheckLimits typeCheckLimits;
if (autocompleteTimeLimit != 0.0)
typeCheckLimits.finishTime = TimeTrace::getClock() + autocompleteTimeLimit;
else
typeCheckLimits.finishTime = std::nullopt;
// TODO: This is a dirty ad hoc solution for autocomplete timeouts
// We are trying to dynamically adjust our existing limits to lower total typechecking time under the limit
// so that we'll have type information for the whole file at lower quality instead of a full abort in the middle
if (FInt::LuauTarjanChildLimit > 0)
typeCheckLimits.instantiationChildLimit = std::max(1, int(FInt::LuauTarjanChildLimit * sourceNode.autocompleteLimitsMult));
else
typeCheckLimits.instantiationChildLimit = std::nullopt;
if (FInt::LuauTypeInferIterationLimit > 0)
typeCheckLimits.unifierIterationLimit = std::max(1, int(FInt::LuauTypeInferIterationLimit * sourceNode.autocompleteLimitsMult));
else
typeCheckLimits.unifierIterationLimit = std::nullopt;
moduleForAutocomplete = check(sourceModule, Mode::Strict, requireCycles, environmentScope, /*forAutocomplete*/ true,
/*recordJsonLog*/ false, typeCheckLimits);
}
moduleResolverForAutocomplete.modules[moduleName] = moduleForAutocomplete;
@ -543,13 +578,22 @@ CheckResult Frontend::check(const ModuleName& name, std::optional<FrontendOption
continue;
}
typeChecker.requireCycles = requireCycles;
const bool recordJsonLog = FFlag::DebugLuauLogSolverToJson && moduleName == name;
ModulePtr module = (FFlag::DebugLuauDeferredConstraintResolution && mode == Mode::Strict)
? check(sourceModule, mode, requireCycles, /*forAutocomplete*/ false, recordJsonLog)
: typeChecker.check(sourceModule, mode, environmentScope);
ModulePtr module;
if (!FFlag::LuauOnDemandTypecheckers)
{
typeChecker_DEPRECATED.requireCycles = requireCycles;
module = (FFlag::DebugLuauDeferredConstraintResolution && mode == Mode::Strict)
? check(sourceModule, mode, requireCycles, environmentScope, /*forAutocomplete*/ false, recordJsonLog, {})
: typeChecker_DEPRECATED.check(sourceModule, mode, environmentScope);
}
else
{
module = check(sourceModule, mode, requireCycles, environmentScope, /*forAutocomplete*/ false, recordJsonLog, {});
}
stats.timeCheck += getTimestamp() - timestamp;
stats.filesStrict += mode == Mode::Strict;
@ -752,7 +796,7 @@ ScopePtr Frontend::getModuleEnvironment(const SourceModule& module, const Config
AstName name = module.names->get(global.c_str());
if (name.value)
result->bindings[name].typeId = typeChecker.anyType;
result->bindings[name].typeId = FFlag::LuauOnDemandTypecheckers ? builtinTypes->anyType : typeChecker_DEPRECATED.anyType;
}
}
@ -829,15 +873,15 @@ const SourceModule* Frontend::getSourceModule(const ModuleName& moduleName) cons
ModulePtr check(const SourceModule& sourceModule, const std::vector<RequireCycle>& requireCycles, NotNull<BuiltinTypes> builtinTypes,
NotNull<InternalErrorReporter> iceHandler, NotNull<ModuleResolver> moduleResolver, NotNull<FileResolver> fileResolver,
const ScopePtr& globalScope, FrontendOptions options)
const ScopePtr& parentScope, FrontendOptions options)
{
const bool recordJsonLog = FFlag::DebugLuauLogSolverToJson;
return check(sourceModule, requireCycles, builtinTypes, iceHandler, moduleResolver, fileResolver, globalScope, options, recordJsonLog);
return check(sourceModule, requireCycles, builtinTypes, iceHandler, moduleResolver, fileResolver, parentScope, options, recordJsonLog);
}
ModulePtr check(const SourceModule& sourceModule, const std::vector<RequireCycle>& requireCycles, NotNull<BuiltinTypes> builtinTypes,
NotNull<InternalErrorReporter> iceHandler, NotNull<ModuleResolver> moduleResolver, NotNull<FileResolver> fileResolver,
const ScopePtr& globalScope, FrontendOptions options, bool recordJsonLog)
const ScopePtr& parentScope, FrontendOptions options, bool recordJsonLog)
{
ModulePtr result = std::make_shared<Module>();
result->reduction = std::make_unique<TypeReduction>(NotNull{&result->internalTypes}, builtinTypes, iceHandler);
@ -868,7 +912,7 @@ ModulePtr check(const SourceModule& sourceModule, const std::vector<RequireCycle
moduleResolver,
builtinTypes,
iceHandler,
globalScope,
parentScope,
logger.get(),
NotNull{&dfg},
};
@ -911,11 +955,35 @@ ModulePtr check(const SourceModule& sourceModule, const std::vector<RequireCycle
return result;
}
ModulePtr Frontend::check(const SourceModule& sourceModule, Mode mode, std::vector<RequireCycle> requireCycles, bool forAutocomplete, bool recordJsonLog)
ModulePtr Frontend::check(const SourceModule& sourceModule, Mode mode, std::vector<RequireCycle> requireCycles,
std::optional<ScopePtr> environmentScope, bool forAutocomplete, bool recordJsonLog, TypeCheckLimits typeCheckLimits)
{
return Luau::check(sourceModule, requireCycles, builtinTypes, NotNull{&iceHandler},
NotNull{forAutocomplete ? &moduleResolverForAutocomplete : &moduleResolver}, NotNull{fileResolver},
forAutocomplete ? globalsForAutocomplete.globalScope : globals.globalScope, options, recordJsonLog);
if (FFlag::DebugLuauDeferredConstraintResolution && mode == Mode::Strict)
{
return Luau::check(sourceModule, requireCycles, builtinTypes, NotNull{&iceHandler},
NotNull{forAutocomplete ? &moduleResolverForAutocomplete : &moduleResolver}, NotNull{fileResolver},
environmentScope ? *environmentScope : globals.globalScope, options, recordJsonLog);
}
else
{
LUAU_ASSERT(FFlag::LuauOnDemandTypecheckers);
TypeChecker typeChecker(globals.globalScope, forAutocomplete ? &moduleResolverForAutocomplete : &moduleResolver, builtinTypes, &iceHandler);
if (prepareModuleScope)
{
typeChecker.prepareModuleScope = [this, forAutocomplete](const ModuleName& name, const ScopePtr& scope) {
prepareModuleScope(name, scope, forAutocomplete);
};
}
typeChecker.requireCycles = requireCycles;
typeChecker.finishTime = typeCheckLimits.finishTime;
typeChecker.instantiationChildLimit = typeCheckLimits.instantiationChildLimit;
typeChecker.unifierIterationLimit = typeCheckLimits.unifierIterationLimit;
return typeChecker.check(sourceModule, mode, environmentScope);
}
}
// Read AST into sourceModules if necessary. Trace require()s. Report parse errors.

View File

@ -20,6 +20,7 @@ LUAU_FASTFLAGVARIABLE(LuauClonePublicInterfaceLess2, false);
LUAU_FASTFLAG(LuauSubstitutionReentrant);
LUAU_FASTFLAG(LuauClassTypeVarsInSubstitution);
LUAU_FASTFLAG(LuauSubstitutionFixMissingFields);
LUAU_FASTFLAGVARIABLE(LuauCopyExportedTypes, false);
namespace Luau
{
@ -37,14 +38,14 @@ static bool contains(Position pos, Comment comment)
return false;
}
bool isWithinComment(const SourceModule& sourceModule, Position pos)
static bool isWithinComment(const std::vector<Comment>& commentLocations, Position pos)
{
auto iter = std::lower_bound(sourceModule.commentLocations.begin(), sourceModule.commentLocations.end(),
Comment{Lexeme::Comment, Location{pos, pos}}, [](const Comment& a, const Comment& b) {
auto iter = std::lower_bound(
commentLocations.begin(), commentLocations.end(), Comment{Lexeme::Comment, Location{pos, pos}}, [](const Comment& a, const Comment& b) {
return a.location.end < b.location.end;
});
if (iter == sourceModule.commentLocations.end())
if (iter == commentLocations.end())
return false;
if (contains(pos, *iter))
@ -53,12 +54,22 @@ bool isWithinComment(const SourceModule& sourceModule, Position pos)
// Due to the nature of std::lower_bound, it is possible that iter points at a comment that ends
// at pos. We'll try the next comment, if it exists.
++iter;
if (iter == sourceModule.commentLocations.end())
if (iter == commentLocations.end())
return false;
return contains(pos, *iter);
}
bool isWithinComment(const SourceModule& sourceModule, Position pos)
{
return isWithinComment(sourceModule.commentLocations, pos);
}
bool isWithinComment(const ParseResult& result, Position pos)
{
return isWithinComment(result.commentLocations, pos);
}
struct ClonePublicInterface : Substitution
{
NotNull<BuiltinTypes> builtinTypes;
@ -227,7 +238,7 @@ void Module::clonePublicInterface(NotNull<BuiltinTypes> builtinTypes, InternalEr
// Copy external stuff over to Module itself
this->returnType = moduleScope->returnType;
if (FFlag::DebugLuauDeferredConstraintResolution)
if (FFlag::DebugLuauDeferredConstraintResolution || FFlag::LuauCopyExportedTypes)
this->exportedTypeBindings = moduleScope->exportedTypeBindings;
else
this->exportedTypeBindings = std::move(moduleScope->exportedTypeBindings);

View File

@ -337,7 +337,16 @@ bool isSubset(const UnionType& super, const UnionType& sub)
return true;
}
bool hasPrimitiveTypeInIntersection(TypeId ty, PrimitiveType::Type primTy)
{
TypeId tf = follow(ty);
if (isPrim(tf, primTy))
return true;
for (auto t : flattenIntersection(tf))
return isPrim(follow(t), primTy);
return false;
}
// When typechecking an assignment `x = e`, we typecheck `x:T` and `e:U`,
// then instantiate U if `isGeneric(U)` is true, and `maybeGeneric(T)` is false.
bool isGeneric(TypeId ty)

View File

@ -1160,11 +1160,7 @@ struct TypeChecker2
visit(expr, RValue);
TypeId leftType = stripFromNilAndReport(lookupType(expr), location);
const NormalizedType* norm = normalizer.normalize(leftType);
if (!norm)
reportError(NormalizationTooComplex{}, location);
checkIndexTypeFromType(leftType, *norm, propName, location, context);
checkIndexTypeFromType(leftType, propName, location, context);
}
void visit(AstExprIndexName* indexName, ValueContext context)
@ -2033,8 +2029,16 @@ struct TypeChecker2
reportError(std::move(e));
}
void checkIndexTypeFromType(TypeId tableTy, const NormalizedType& norm, const std::string& prop, const Location& location, ValueContext context)
// If the provided type does not have the named property, report an error.
void checkIndexTypeFromType(TypeId tableTy, const std::string& prop, const Location& location, ValueContext context)
{
const NormalizedType* norm = normalizer.normalize(tableTy);
if (!norm)
{
reportError(NormalizationTooComplex{}, location);
return;
}
bool foundOneProp = false;
std::vector<TypeId> typesMissingTheProp;
@ -2042,49 +2046,50 @@ struct TypeChecker2
if (!normalizer.isInhabited(ty))
return;
bool found = hasIndexTypeFromType(ty, prop, location);
std::unordered_set<TypeId> seen;
bool found = hasIndexTypeFromType(ty, prop, location, seen);
foundOneProp |= found;
if (!found)
typesMissingTheProp.push_back(ty);
};
fetch(norm.tops);
fetch(norm.booleans);
fetch(norm->tops);
fetch(norm->booleans);
if (FFlag::LuauNegatedClassTypes)
{
for (const auto& [ty, _negations] : norm.classes.classes)
for (const auto& [ty, _negations] : norm->classes.classes)
{
fetch(ty);
}
}
else
{
for (TypeId ty : norm.DEPRECATED_classes)
for (TypeId ty : norm->DEPRECATED_classes)
fetch(ty);
}
fetch(norm.errors);
fetch(norm.nils);
fetch(norm.numbers);
if (!norm.strings.isNever())
fetch(norm->errors);
fetch(norm->nils);
fetch(norm->numbers);
if (!norm->strings.isNever())
fetch(builtinTypes->stringType);
fetch(norm.threads);
for (TypeId ty : norm.tables)
fetch(norm->threads);
for (TypeId ty : norm->tables)
fetch(ty);
if (norm.functions.isTop)
if (norm->functions.isTop)
fetch(builtinTypes->functionType);
else if (!norm.functions.isNever())
else if (!norm->functions.isNever())
{
if (norm.functions.parts.size() == 1)
fetch(norm.functions.parts.front());
if (norm->functions.parts.size() == 1)
fetch(norm->functions.parts.front());
else
{
std::vector<TypeId> parts;
parts.insert(parts.end(), norm.functions.parts.begin(), norm.functions.parts.end());
parts.insert(parts.end(), norm->functions.parts.begin(), norm->functions.parts.end());
fetch(testArena.addType(IntersectionType{std::move(parts)}));
}
}
for (const auto& [tyvar, intersect] : norm.tyvars)
for (const auto& [tyvar, intersect] : norm->tyvars)
{
if (get<NeverType>(intersect->tops))
{
@ -2110,8 +2115,15 @@ struct TypeChecker2
}
}
bool hasIndexTypeFromType(TypeId ty, const std::string& prop, const Location& location)
bool hasIndexTypeFromType(TypeId ty, const std::string& prop, const Location& location, std::unordered_set<TypeId>& seen)
{
// If we have already encountered this type, we must assume that some
// other codepath will do the right thing and signal false if the
// property is not present.
const bool isUnseen = seen.insert(ty).second;
if (!isUnseen)
return true;
if (get<ErrorType>(ty) || get<AnyType>(ty) || get<NeverType>(ty))
return true;
@ -2136,10 +2148,12 @@ struct TypeChecker2
else if (const ClassType* cls = get<ClassType>(ty))
return bool(lookupClassProp(cls, prop));
else if (const UnionType* utv = get<UnionType>(ty))
ice.ice("getIndexTypeFromTypeHelper cannot take a UnionType");
return std::all_of(begin(utv), end(utv), [&](TypeId part) {
return hasIndexTypeFromType(part, prop, location, seen);
});
else if (const IntersectionType* itv = get<IntersectionType>(ty))
return std::any_of(begin(itv), end(itv), [&](TypeId part) {
return hasIndexTypeFromType(part, prop, location);
return hasIndexTypeFromType(part, prop, location, seen);
});
else
return false;

View File

@ -35,14 +35,13 @@ LUAU_FASTFLAG(LuauKnowsTheDataModel3)
LUAU_FASTFLAGVARIABLE(DebugLuauFreezeDuringUnification, false)
LUAU_FASTFLAGVARIABLE(LuauReturnAnyInsteadOfICE, false) // Eventually removed as false.
LUAU_FASTFLAGVARIABLE(DebugLuauSharedSelf, false)
LUAU_FASTFLAGVARIABLE(LuauTryhardAnd, false)
LUAU_FASTFLAG(LuauInstantiateInSubtyping)
LUAU_FASTFLAG(LuauNegatedClassTypes)
LUAU_FASTFLAGVARIABLE(LuauAllowIndexClassParameters, false)
LUAU_FASTFLAG(LuauUninhabitedSubAnything2)
LUAU_FASTFLAG(LuauOccursIsntAlwaysFailure)
LUAU_FASTFLAGVARIABLE(LuauTypecheckTypeguards, false)
LUAU_FASTFLAGVARIABLE(LuauTinyControlFlowAnalysis, false)
LUAU_FASTFLAGVARIABLE(LuauReducingAndOr, false)
namespace Luau
{
@ -1623,9 +1622,28 @@ ControlFlow TypeChecker::check(const ScopePtr& scope, const AstStatTypeAlias& ty
TypeId& bindingType = bindingsMap[name].type;
if (unify(ty, bindingType, aliasScope, typealias.location))
bindingType = ty;
if (!FFlag::LuauOccursIsntAlwaysFailure)
{
if (unify(ty, bindingType, aliasScope, typealias.location))
bindingType = ty;
return ControlFlow::None;
}
unify(ty, bindingType, aliasScope, typealias.location);
// It is possible for this unification to succeed but for
// `bindingType` still to be free For example, in
// `type T = T|T`, we generate a fresh free type `X`, and then
// unify `X` with `X|X`, which succeeds without binding `X` to
// anything, since `X <: X|X`
if (bindingType->ty.get_if<FreeType>())
{
ty = errorRecoveryType(aliasScope);
unify(ty, bindingType, aliasScope, typealias.location);
reportError(TypeError{typealias.location, OccursCheckFailed{}});
}
bindingType = ty;
return ControlFlow::None;
}
@ -2848,7 +2866,7 @@ TypeId TypeChecker::checkRelationalOperation(
{
return lhsType;
}
else if (FFlag::LuauTryhardAnd)
else
{
// If lhs is free, we can't tell which 'falsy' components it has, if any
if (get<FreeType>(lhsType))
@ -2860,14 +2878,11 @@ TypeId TypeChecker::checkRelationalOperation(
{
LUAU_ASSERT(oty);
if (FFlag::LuauReducingAndOr)
{
// Perform a limited form of type reduction for booleans
if (isPrim(*oty, PrimitiveType::Boolean) && get<BooleanSingleton>(get<SingletonType>(follow(rhsType))))
return booleanType;
if (isPrim(rhsType, PrimitiveType::Boolean) && get<BooleanSingleton>(get<SingletonType>(follow(*oty))))
return booleanType;
}
// Perform a limited form of type reduction for booleans
if (isPrim(*oty, PrimitiveType::Boolean) && get<BooleanSingleton>(get<SingletonType>(follow(rhsType))))
return booleanType;
if (isPrim(rhsType, PrimitiveType::Boolean) && get<BooleanSingleton>(get<SingletonType>(follow(*oty))))
return booleanType;
return unionOfTypes(*oty, rhsType, scope, expr.location, false);
}
@ -2876,16 +2891,12 @@ TypeId TypeChecker::checkRelationalOperation(
return rhsType;
}
}
else
{
return unionOfTypes(rhsType, booleanType, scope, expr.location, false);
}
case AstExprBinary::Or:
if (lhsIsAny)
{
return lhsType;
}
else if (FFlag::LuauTryhardAnd)
else
{
auto [oty, notNever] = pickTypesFromSense(lhsType, true, neverType); // Filter out truthy types
@ -2893,14 +2904,11 @@ TypeId TypeChecker::checkRelationalOperation(
{
LUAU_ASSERT(oty);
if (FFlag::LuauReducingAndOr)
{
// Perform a limited form of type reduction for booleans
if (isPrim(*oty, PrimitiveType::Boolean) && get<BooleanSingleton>(get<SingletonType>(follow(rhsType))))
return booleanType;
if (isPrim(rhsType, PrimitiveType::Boolean) && get<BooleanSingleton>(get<SingletonType>(follow(*oty))))
return booleanType;
}
// Perform a limited form of type reduction for booleans
if (isPrim(*oty, PrimitiveType::Boolean) && get<BooleanSingleton>(get<SingletonType>(follow(rhsType))))
return booleanType;
if (isPrim(rhsType, PrimitiveType::Boolean) && get<BooleanSingleton>(get<SingletonType>(follow(*oty))))
return booleanType;
return unionOfTypes(*oty, rhsType, scope, expr.location);
}
@ -2909,10 +2917,6 @@ TypeId TypeChecker::checkRelationalOperation(
return rhsType;
}
}
else
{
return unionOfTypes(lhsType, rhsType, scope, expr.location);
}
default:
LUAU_ASSERT(0);
ice(format("checkRelationalOperation called with incorrect binary expression '%s'", toString(expr.op).c_str()), expr.location);

View File

@ -19,8 +19,10 @@ LUAU_FASTINT(LuauTypeInferTypePackLoopLimit)
LUAU_FASTFLAG(LuauErrorRecoveryType)
LUAU_FASTFLAGVARIABLE(LuauInstantiateInSubtyping, false)
LUAU_FASTFLAGVARIABLE(LuauUninhabitedSubAnything2, false)
LUAU_FASTFLAGVARIABLE(LuauVariadicAnyCanBeGeneric, false)
LUAU_FASTFLAGVARIABLE(LuauMaintainScopesInUnifier, false)
LUAU_FASTFLAGVARIABLE(LuauTransitiveSubtyping, false)
LUAU_FASTFLAGVARIABLE(LuauOccursIsntAlwaysFailure, false)
LUAU_FASTFLAG(LuauClassTypeVarsInSubstitution)
LUAU_FASTFLAG(DebugLuauDeferredConstraintResolution)
LUAU_FASTFLAG(LuauNormalizeBlockedTypes)
@ -431,14 +433,14 @@ void Unifier::tryUnify_(TypeId subTy, TypeId superTy, bool isFunctionCall, bool
if (superFree && subFree && subsumes(useScopes, superFree, subFree))
{
if (!occursCheck(subTy, superTy))
if (!occursCheck(subTy, superTy, /* reversed = */ false))
log.replace(subTy, BoundType(superTy));
return;
}
else if (superFree && subFree)
{
if (!occursCheck(superTy, subTy))
if (!occursCheck(superTy, subTy, /* reversed = */ true))
{
if (subsumes(useScopes, superFree, subFree))
{
@ -461,7 +463,7 @@ void Unifier::tryUnify_(TypeId subTy, TypeId superTy, bool isFunctionCall, bool
return;
}
if (!occursCheck(superTy, subTy))
if (!occursCheck(superTy, subTy, /* reversed = */ true))
{
promoteTypeLevels(log, types, superFree->level, superFree->scope, useScopes, subTy);
@ -487,7 +489,7 @@ void Unifier::tryUnify_(TypeId subTy, TypeId superTy, bool isFunctionCall, bool
return;
}
if (!occursCheck(subTy, superTy))
if (!occursCheck(subTy, superTy, /* reversed = */ false))
{
promoteTypeLevels(log, types, subFree->level, subFree->scope, useScopes, superTy);
log.replace(subTy, BoundType(superTy));
@ -1593,7 +1595,7 @@ void Unifier::tryUnify_(TypePackId subTp, TypePackId superTp, bool isFunctionCal
if (log.getMutable<FreeTypePack>(superTp))
{
if (!occursCheck(superTp, subTp))
if (!occursCheck(superTp, subTp, /* reversed = */ true))
{
Widen widen{types, builtinTypes};
log.replace(superTp, Unifiable::Bound<TypePackId>(widen(subTp)));
@ -1601,7 +1603,7 @@ void Unifier::tryUnify_(TypePackId subTp, TypePackId superTp, bool isFunctionCal
}
else if (log.getMutable<FreeTypePack>(subTp))
{
if (!occursCheck(subTp, superTp))
if (!occursCheck(subTp, superTp, /* reversed = */ false))
{
log.replace(subTp, Unifiable::Bound<TypePackId>(superTp));
}
@ -2585,13 +2587,14 @@ static void queueTypePack(std::vector<TypeId>& queue, DenseHashSet<TypePackId>&
void Unifier::tryUnifyVariadics(TypePackId subTp, TypePackId superTp, bool reversed, int subOffset)
{
const VariadicTypePack* superVariadic = log.getMutable<VariadicTypePack>(superTp);
const TypeId variadicTy = follow(superVariadic->ty);
if (!superVariadic)
ice("passed non-variadic pack to tryUnifyVariadics");
if (const VariadicTypePack* subVariadic = log.get<VariadicTypePack>(subTp))
{
tryUnify_(reversed ? superVariadic->ty : subVariadic->ty, reversed ? subVariadic->ty : superVariadic->ty);
tryUnify_(reversed ? variadicTy : subVariadic->ty, reversed ? subVariadic->ty : variadicTy);
}
else if (log.get<TypePack>(subTp))
{
@ -2602,7 +2605,7 @@ void Unifier::tryUnifyVariadics(TypePackId subTp, TypePackId superTp, bool rever
while (subIter != subEnd)
{
tryUnify_(reversed ? superVariadic->ty : *subIter, reversed ? *subIter : superVariadic->ty);
tryUnify_(reversed ? variadicTy : *subIter, reversed ? *subIter : variadicTy);
++subIter;
}
@ -2615,7 +2618,7 @@ void Unifier::tryUnifyVariadics(TypePackId subTp, TypePackId superTp, bool rever
}
else if (const VariadicTypePack* vtp = get<VariadicTypePack>(tail))
{
tryUnify_(vtp->ty, superVariadic->ty);
tryUnify_(vtp->ty, variadicTy);
}
else if (get<GenericTypePack>(tail))
{
@ -2631,6 +2634,10 @@ void Unifier::tryUnifyVariadics(TypePackId subTp, TypePackId superTp, bool rever
}
}
}
else if (FFlag::LuauVariadicAnyCanBeGeneric && get<AnyType>(variadicTy) && log.get<GenericTypePack>(subTp))
{
// Nothing to do. This is ok.
}
else
{
reportError(location, GenericError{"Failed to unify variadic packs"});
@ -2751,11 +2758,42 @@ TxnLog Unifier::combineLogsIntoUnion(std::vector<TxnLog> logs)
return result;
}
bool Unifier::occursCheck(TypeId needle, TypeId haystack)
bool Unifier::occursCheck(TypeId needle, TypeId haystack, bool reversed)
{
sharedState.tempSeenTy.clear();
return occursCheck(sharedState.tempSeenTy, needle, haystack);
bool occurs = occursCheck(sharedState.tempSeenTy, needle, haystack);
if (occurs && FFlag::LuauOccursIsntAlwaysFailure)
{
Unifier innerState = makeChildUnifier();
if (const UnionType* ut = get<UnionType>(haystack))
{
if (reversed)
innerState.tryUnifyUnionWithType(haystack, ut, needle);
else
innerState.tryUnifyTypeWithUnion(needle, haystack, ut, /* cacheEnabled = */ false, /* isFunction = */ false);
}
else if (const IntersectionType* it = get<IntersectionType>(haystack))
{
if (reversed)
innerState.tryUnifyIntersectionWithType(haystack, it, needle, /* cacheEnabled = */ false, /* isFunction = */ false);
else
innerState.tryUnifyTypeWithIntersection(needle, haystack, it);
}
else
{
innerState.failure = true;
}
if (innerState.failure)
{
reportError(location, OccursCheckFailed{});
log.replace(needle, *builtinTypes->errorRecoveryType());
}
}
return occurs;
}
bool Unifier::occursCheck(DenseHashSet<TypeId>& seen, TypeId needle, TypeId haystack)
@ -2785,8 +2823,11 @@ bool Unifier::occursCheck(DenseHashSet<TypeId>& seen, TypeId needle, TypeId hays
if (needle == haystack)
{
reportError(location, OccursCheckFailed{});
log.replace(needle, *builtinTypes->errorRecoveryType());
if (!FFlag::LuauOccursIsntAlwaysFailure)
{
reportError(location, OccursCheckFailed{});
log.replace(needle, *builtinTypes->errorRecoveryType());
}
return true;
}
@ -2807,11 +2848,19 @@ bool Unifier::occursCheck(DenseHashSet<TypeId>& seen, TypeId needle, TypeId hays
return occurrence;
}
bool Unifier::occursCheck(TypePackId needle, TypePackId haystack)
bool Unifier::occursCheck(TypePackId needle, TypePackId haystack, bool reversed)
{
sharedState.tempSeenTp.clear();
return occursCheck(sharedState.tempSeenTp, needle, haystack);
bool occurs = occursCheck(sharedState.tempSeenTp, needle, haystack);
if (occurs && FFlag::LuauOccursIsntAlwaysFailure)
{
reportError(location, OccursCheckFailed{});
log.replace(needle, *builtinTypes->errorRecoveryTypePack());
}
return occurs;
}
bool Unifier::occursCheck(DenseHashSet<TypePackId>& seen, TypePackId needle, TypePackId haystack)
@ -2836,8 +2885,11 @@ bool Unifier::occursCheck(DenseHashSet<TypePackId>& seen, TypePackId needle, Typ
{
if (needle == haystack)
{
reportError(location, OccursCheckFailed{});
log.replace(needle, *builtinTypes->errorRecoveryTypePack());
if (!FFlag::LuauOccursIsntAlwaysFailure)
{
reportError(location, OccursCheckFailed{});
log.replace(needle, *builtinTypes->errorRecoveryTypePack());
}
return true;
}

View File

@ -167,7 +167,9 @@ size_t editDistance(std::string_view a, std::string_view b)
for (size_t y = 1; y <= b.size(); ++y)
{
size_t x1 = seenCharToRow[b[y - 1]];
// The value of b[N] can be negative with unicode characters
unsigned char bSeenCharIndex = static_cast<unsigned char>(b[y - 1]);
size_t x1 = seenCharToRow[bSeenCharIndex];
size_t y1 = lastMatchedY;
size_t cost = 1;
@ -187,7 +189,9 @@ size_t editDistance(std::string_view a, std::string_view b)
distances[getPos(x + 1, y + 1)] = std::min(std::min(insertion, deletion), std::min(substitution, transposition));
}
seenCharToRow[a[x - 1]] = x;
// The value of a[N] can be negative with unicode characters
unsigned char aSeenCharIndex = static_cast<unsigned char>(a[x - 1]);
seenCharToRow[aSeenCharIndex] = x;
}
return distances[getPos(a.size() + 1, b.size() + 1)];

View File

@ -29,7 +29,7 @@ struct AddressA64
// For example, ldr x0, [reg+imm] is limited to 8 KB offsets assuming imm is divisible by 8, but loading into w0 reduces the range to 4 KB
static constexpr size_t kMaxOffset = 1023;
AddressA64(RegisterA64 base, int off = 0)
constexpr AddressA64(RegisterA64 base, int off = 0)
: kind(AddressKindA64::imm)
, base(base)
, offset(xzr)
@ -38,7 +38,7 @@ struct AddressA64
LUAU_ASSERT(base.kind == KindA64::x || base == sp);
}
AddressA64(RegisterA64 base, RegisterA64 offset)
constexpr AddressA64(RegisterA64 base, RegisterA64 offset)
: kind(AddressKindA64::reg)
, base(base)
, offset(offset)

View File

@ -49,17 +49,25 @@ public:
void cmp(RegisterA64 src1, RegisterA64 src2);
void cmp(RegisterA64 src1, uint16_t src2);
void csel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond);
void cset(RegisterA64 dst, ConditionA64 cond);
// Bitwise
// TODO: support immediate arguments (they have odd encoding and forbid many values)
// TODO: support bic (andnot)
// TODO: support shifts
// TODO: support bitfield ops
void and_(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
void orr(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
void eor(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
void bic(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
void tst(RegisterA64 src1, RegisterA64 src2);
void mvn(RegisterA64 dst, RegisterA64 src);
// Bitwise with immediate
// Note: immediate must have a single contiguous sequence of 1 bits set of length 1..31
void and_(RegisterA64 dst, RegisterA64 src1, uint32_t src2);
void orr(RegisterA64 dst, RegisterA64 src1, uint32_t src2);
void eor(RegisterA64 dst, RegisterA64 src1, uint32_t src2);
void tst(RegisterA64 src1, uint32_t src2);
// Shifts
void lsl(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
void lsr(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
@ -168,7 +176,7 @@ public:
private:
// Instruction archetypes
void place0(const char* name, uint32_t word);
void placeSR3(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, int shift = 0);
void placeSR3(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, int shift = 0, int N = 0);
void placeSR2(const char* name, RegisterA64 dst, RegisterA64 src, uint8_t op, uint8_t op2 = 0);
void placeR3(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, uint8_t op2);
void placeR1(const char* name, RegisterA64 dst, RegisterA64 src, uint32_t op);
@ -181,8 +189,9 @@ private:
void placeADR(const char* name, RegisterA64 src, uint8_t op);
void placeADR(const char* name, RegisterA64 src, uint8_t op, Label& label);
void placeP(const char* name, RegisterA64 dst1, RegisterA64 dst2, AddressA64 src, uint8_t op, uint8_t opc, int sizelog);
void placeCS(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond, uint8_t op, uint8_t opc);
void placeCS(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond, uint8_t op, uint8_t opc, int invert = 0);
void placeFCMP(const char* name, RegisterA64 src1, RegisterA64 src2, uint8_t op, uint8_t opc);
void placeBM(const char* name, RegisterA64 dst, RegisterA64 src1, uint32_t src2, uint8_t op);
void place(uint32_t word);

View File

@ -41,12 +41,14 @@ public:
void call(const OperandX64& func);
RegisterX64 suggestNextArgumentRegister(SizeX64 size) const;
IrRegAllocX64& regs;
AssemblyBuilderX64& build;
uint32_t instIdx = ~0u;
private:
void assignTargetRegisters();
OperandX64 getNextArgumentTarget(SizeX64 size) const;
void countRegisterUses();
CallArgument* findNonInterferingArgument();
bool interferesWithOperand(const OperandX64& op, RegisterX64 reg) const;
@ -67,6 +69,9 @@ private:
std::array<CallArgument, kMaxCallArguments> args;
int argCount = 0;
int gprPos = 0;
int xmmPos = 0;
OperandX64 funcOp;
// Internal counters for remaining register use counts

View File

@ -155,7 +155,7 @@ enum class IrCmd : uint8_t
// Compute Luau 'not' operation on destructured TValue
// A: tag
// B: double
// B: int (value)
NOT_ANY, // TODO: boolean specialization will be useful
// Unconditional jump
@ -233,7 +233,7 @@ enum class IrCmd : uint8_t
// Try to get pointer to tag method TValue inside the table's metatable or jump if there is no such value or metatable
// A: table
// B: int
// B: int (TMS enum)
// C: block
TRY_CALL_FASTGETTM,
@ -256,8 +256,8 @@ enum class IrCmd : uint8_t
// B: Rn (result start)
// C: Rn (argument start)
// D: Rn or Kn or a boolean that's false (optional second argument)
// E: int (argument count or -1 to use all arguments up to stack top)
// F: int (result count or -1 to preserve all results and adjust stack top)
// E: int (argument count)
// F: int (result count)
FASTCALL,
// Call the fastcall builtin function
@ -517,8 +517,10 @@ enum class IrCmd : uint8_t
FALLBACK_FORGPREP,
// Instruction that passes value through, it is produced by constant folding and users substitute it with the value
// When operand location is set, updates the tracked location of the value in memory
SUBSTITUTE,
// A: operand of any type
// B: Rn/Kn/none (location of operand in memory; optional)
};
enum class IrConstKind : uint8_t
@ -694,6 +696,9 @@ struct IrFunction
std::vector<BytecodeMapping> bcMapping;
// For each instruction, an operand that can be used to recompute the calue
std::vector<IrOp> valueRestoreOps;
Proto* proto = nullptr;
CfgInfo cfg;
@ -829,19 +834,40 @@ struct IrFunction
return value.valueDouble;
}
uint32_t getBlockIndex(const IrBlock& block)
uint32_t getBlockIndex(const IrBlock& block) const
{
// Can only be called with blocks from our vector
LUAU_ASSERT(&block >= blocks.data() && &block <= blocks.data() + blocks.size());
return uint32_t(&block - blocks.data());
}
uint32_t getInstIndex(const IrInst& inst)
uint32_t getInstIndex(const IrInst& inst) const
{
// Can only be called with instructions from our vector
LUAU_ASSERT(&inst >= instructions.data() && &inst <= instructions.data() + instructions.size());
return uint32_t(&inst - instructions.data());
}
void recordRestoreOp(uint32_t instIdx, IrOp location)
{
if (instIdx >= valueRestoreOps.size())
valueRestoreOps.resize(instIdx + 1);
valueRestoreOps[instIdx] = location;
}
IrOp findRestoreOp(uint32_t instIdx) const
{
if (instIdx >= valueRestoreOps.size())
return {};
return valueRestoreOps[instIdx];
}
IrOp findRestoreOp(const IrInst& inst) const
{
return findRestoreOp(getInstIndex(inst));
}
};
inline IrCondition conditionOp(IrOp op)

View File

@ -20,7 +20,9 @@ constexpr uint8_t kNoStackSlot = 0xff;
struct IrSpillX64
{
uint32_t instIdx = 0;
bool useDoubleSlot = 0;
IrValueKind valueKind = IrValueKind::Unknown;
unsigned spillId = 0;
// Spill location can be a stack location or be empty
// When it's empty, it means that instruction value can be rematerialized
@ -33,12 +35,8 @@ struct IrRegAllocX64
{
IrRegAllocX64(AssemblyBuilderX64& build, IrFunction& function);
RegisterX64 allocGprReg(SizeX64 preferredSize, uint32_t instIdx);
RegisterX64 allocXmmReg(uint32_t instIdx);
RegisterX64 allocGprRegOrReuse(SizeX64 preferredSize, uint32_t instIdx, std::initializer_list<IrOp> oprefs);
RegisterX64 allocXmmRegOrReuse(uint32_t instIdx, std::initializer_list<IrOp> oprefs);
RegisterX64 allocReg(SizeX64 size, uint32_t instIdx);
RegisterX64 allocRegOrReuse(SizeX64 size, uint32_t instIdx, std::initializer_list<IrOp> oprefs);
RegisterX64 takeReg(RegisterX64 reg, uint32_t instIdx);
void freeReg(RegisterX64 reg);
@ -49,6 +47,12 @@ struct IrRegAllocX64
bool shouldFreeGpr(RegisterX64 reg) const;
unsigned findSpillStackSlot(IrValueKind valueKind);
IrOp getRestoreOp(const IrInst& inst) const;
bool hasRestoreOp(const IrInst& inst) const;
OperandX64 getRestoreAddress(const IrInst& inst, IrOp restoreOp);
// Register used by instruction is about to be freed, have to find a way to restore value later
void preserve(IrInst& inst);
@ -74,6 +78,7 @@ struct IrRegAllocX64
std::bitset<256> usedSpillSlots;
unsigned maxUsedSlot = 0;
unsigned nextSpillId = 1;
std::vector<IrSpillX64> spills;
};
@ -107,10 +112,8 @@ struct ScopedSpills
ScopedSpills(const ScopedSpills&) = delete;
ScopedSpills& operator=(const ScopedSpills&) = delete;
bool wasSpilledBefore(const IrSpillX64& spill) const;
IrRegAllocX64& owner;
std::vector<IrSpillX64> snapshot;
unsigned startSpillId = 0;
};
} // namespace X64

View File

@ -200,7 +200,7 @@ void replace(IrFunction& function, IrOp& original, IrOp replacement);
void replace(IrFunction& function, IrBlock& block, uint32_t instIdx, IrInst replacement);
// Replace instruction with a different value (using IrCmd::SUBSTITUTE)
void substitute(IrFunction& function, IrInst& inst, IrOp replacement);
void substitute(IrFunction& function, IrInst& inst, IrOp replacement, IrOp location = {});
// Replace instruction arguments that point to substitutions with target values
void applySubstitutions(IrFunction& function, IrOp& op);

View File

@ -46,6 +46,18 @@ constexpr RegisterA64 castReg(KindA64 kind, RegisterA64 reg)
return RegisterA64{kind, reg.index};
}
// This is equivalent to castReg(KindA64::x), but is separate because it implies different semantics
// Specifically, there are cases when it's useful to treat a wN register as an xN register *after* it has been assigned a value
// Since all A64 instructions that write to wN implicitly zero the top half, this works when we need zero extension semantics
// Crucially, this is *not* safe on an ABI boundary - an int parameter in wN register may have anything in its top half in certain cases
// However, as long as our codegen doesn't use 32-bit truncation by using castReg x=>w, we can safely rely on this.
constexpr RegisterA64 zextReg(RegisterA64 reg)
{
LUAU_ASSERT(reg.kind == KindA64::w);
return RegisterA64{KindA64::x, reg.index};
}
constexpr RegisterA64 noreg{KindA64::none, 0};
constexpr RegisterA64 w0{KindA64::w, 0};

View File

@ -46,6 +46,18 @@ constexpr RegisterX64 al{SizeX64::byte, 0};
constexpr RegisterX64 cl{SizeX64::byte, 1};
constexpr RegisterX64 dl{SizeX64::byte, 2};
constexpr RegisterX64 bl{SizeX64::byte, 3};
constexpr RegisterX64 spl{SizeX64::byte, 4};
constexpr RegisterX64 bpl{SizeX64::byte, 5};
constexpr RegisterX64 sil{SizeX64::byte, 6};
constexpr RegisterX64 dil{SizeX64::byte, 7};
constexpr RegisterX64 r8b{SizeX64::byte, 8};
constexpr RegisterX64 r9b{SizeX64::byte, 9};
constexpr RegisterX64 r10b{SizeX64::byte, 10};
constexpr RegisterX64 r11b{SizeX64::byte, 11};
constexpr RegisterX64 r12b{SizeX64::byte, 12};
constexpr RegisterX64 r13b{SizeX64::byte, 13};
constexpr RegisterX64 r14b{SizeX64::byte, 14};
constexpr RegisterX64 r15b{SizeX64::byte, 15};
constexpr RegisterX64 eax{SizeX64::dword, 0};
constexpr RegisterX64 ecx{SizeX64::dword, 1};

View File

@ -11,6 +11,9 @@ namespace Luau
namespace CodeGen
{
// This value is used in 'finishFunction' to mark the function that spans to the end of the whole code block
static uint32_t kFullBlockFuncton = ~0u;
class UnwindBuilder
{
public:
@ -19,19 +22,22 @@ public:
virtual void setBeginOffset(size_t beginOffset) = 0;
virtual size_t getBeginOffset() const = 0;
virtual void start() = 0;
virtual void startInfo() = 0;
virtual void startFunction() = 0;
virtual void spill(int espOffset, X64::RegisterX64 reg) = 0;
virtual void save(X64::RegisterX64 reg) = 0;
virtual void allocStack(int size) = 0;
virtual void setupFrameReg(X64::RegisterX64 reg, int espOffset) = 0;
virtual void finishFunction(uint32_t beginOffset, uint32_t endOffset) = 0;
virtual void finish() = 0;
virtual void finishInfo() = 0;
virtual size_t getSize() const = 0;
virtual size_t getFunctionCount() const = 0;
// This will place the unwinding data at the target address and might update values of some fields
virtual void finalize(char* target, void* funcAddress, size_t funcSize) const = 0;
virtual void finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const = 0;
};
} // namespace CodeGen

View File

@ -4,34 +4,48 @@
#include "Luau/RegisterX64.h"
#include "UnwindBuilder.h"
#include <vector>
namespace Luau
{
namespace CodeGen
{
struct UnwindFunctionDwarf2
{
uint32_t beginOffset;
uint32_t endOffset;
uint32_t fdeEntryStartPos;
};
class UnwindBuilderDwarf2 : public UnwindBuilder
{
public:
void setBeginOffset(size_t beginOffset) override;
size_t getBeginOffset() const override;
void start() override;
void startInfo() override;
void startFunction() override;
void spill(int espOffset, X64::RegisterX64 reg) override;
void save(X64::RegisterX64 reg) override;
void allocStack(int size) override;
void setupFrameReg(X64::RegisterX64 reg, int espOffset) override;
void finishFunction(uint32_t beginOffset, uint32_t endOffset) override;
void finish() override;
void finishInfo() override;
size_t getSize() const override;
size_t getFunctionCount() const override;
void finalize(char* target, void* funcAddress, size_t funcSize) const override;
void finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const override;
private:
size_t beginOffset = 0;
static const unsigned kRawDataLimit = 128;
std::vector<UnwindFunctionDwarf2> unwindFunctions;
static const unsigned kRawDataLimit = 1024;
uint8_t rawData[kRawDataLimit];
uint8_t* pos = rawData;

View File

@ -11,6 +11,25 @@ namespace Luau
namespace CodeGen
{
// This struct matches the layout of x64 RUNTIME_FUNCTION from winnt.h
struct UnwindFunctionWin
{
uint32_t beginOffset;
uint32_t endOffset;
uint32_t unwindInfoOffset;
};
// This struct matches the layout of x64 UNWIND_INFO from ehdata.h
struct UnwindInfoWin
{
uint8_t version : 3;
uint8_t flags : 5;
uint8_t prologsize;
uint8_t unwindcodecount;
uint8_t framereg : 4;
uint8_t frameregoff : 4;
};
// This struct matches the layout of UNWIND_CODE from ehdata.h
struct UnwindCodeWin
{
@ -25,31 +44,38 @@ public:
void setBeginOffset(size_t beginOffset) override;
size_t getBeginOffset() const override;
void start() override;
void startInfo() override;
void startFunction() override;
void spill(int espOffset, X64::RegisterX64 reg) override;
void save(X64::RegisterX64 reg) override;
void allocStack(int size) override;
void setupFrameReg(X64::RegisterX64 reg, int espOffset) override;
void finishFunction(uint32_t beginOffset, uint32_t endOffset) override;
void finish() override;
void finishInfo() override;
size_t getSize() const override;
size_t getFunctionCount() const override;
void finalize(char* target, void* funcAddress, size_t funcSize) const override;
void finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const override;
private:
size_t beginOffset = 0;
static const unsigned kRawDataLimit = 1024;
uint8_t rawData[kRawDataLimit];
uint8_t* rawDataPos = rawData;
std::vector<UnwindFunctionWin> unwindFunctions;
// Windows unwind codes are written in reverse, so we have to collect them all first
std::vector<UnwindCodeWin> unwindCodes;
uint8_t prologSize = 0;
X64::RegisterX64 frameReg = X64::rax; // rax means that frame register is not used
X64::RegisterX64 frameReg = X64::noreg;
uint8_t frameRegOffset = 0;
uint32_t stackOffset = 0;
size_t infoSize = 0;
};
} // namespace CodeGen

View File

@ -1,6 +1,7 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/AssemblyBuilderA64.h"
#include "BitUtils.h"
#include "ByteUtils.h"
#include <stdarg.h>
@ -126,6 +127,15 @@ void AssemblyBuilderA64::csel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src
placeCS("csel", dst, src1, src2, cond, 0b11010'10'0, 0b00);
}
void AssemblyBuilderA64::cset(RegisterA64 dst, ConditionA64 cond)
{
LUAU_ASSERT(dst.kind == KindA64::x || dst.kind == KindA64::w);
RegisterA64 src = dst.kind == KindA64::x ? xzr : wzr;
placeCS("cset", dst, src, src, cond, 0b11010'10'0, 0b01, /* invert= */ 1);
}
void AssemblyBuilderA64::and_(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2)
{
placeSR3("and", dst, src1, src2, 0b00'01010);
@ -141,11 +151,45 @@ void AssemblyBuilderA64::eor(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2
placeSR3("eor", dst, src1, src2, 0b10'01010);
}
void AssemblyBuilderA64::bic(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2)
{
placeSR3("bic", dst, src1, src2, 0b00'01010, /* shift= */ 0, /* N= */ 1);
}
void AssemblyBuilderA64::tst(RegisterA64 src1, RegisterA64 src2)
{
RegisterA64 dst = src1.kind == KindA64::x ? xzr : wzr;
placeSR3("tst", dst, src1, src2, 0b11'01010);
}
void AssemblyBuilderA64::mvn(RegisterA64 dst, RegisterA64 src)
{
placeSR2("mvn", dst, src, 0b01'01010, 0b1);
}
void AssemblyBuilderA64::and_(RegisterA64 dst, RegisterA64 src1, uint32_t src2)
{
placeBM("and", dst, src1, src2, 0b00'100100);
}
void AssemblyBuilderA64::orr(RegisterA64 dst, RegisterA64 src1, uint32_t src2)
{
placeBM("orr", dst, src1, src2, 0b01'100100);
}
void AssemblyBuilderA64::eor(RegisterA64 dst, RegisterA64 src1, uint32_t src2)
{
placeBM("eor", dst, src1, src2, 0b10'100100);
}
void AssemblyBuilderA64::tst(RegisterA64 src1, uint32_t src2)
{
RegisterA64 dst = src1.kind == KindA64::x ? xzr : wzr;
placeBM("tst", dst, src1, src2, 0b11'100100);
}
void AssemblyBuilderA64::lsl(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2)
{
placeR3("lsl", dst, src1, src2, 0b11010110, 0b0010'00);
@ -583,7 +627,7 @@ void AssemblyBuilderA64::place0(const char* name, uint32_t op)
commit();
}
void AssemblyBuilderA64::placeSR3(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, int shift)
void AssemblyBuilderA64::placeSR3(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, int shift, int N)
{
if (logText)
log(name, dst, src1, src2, shift);
@ -594,7 +638,7 @@ void AssemblyBuilderA64::placeSR3(const char* name, RegisterA64 dst, RegisterA64
uint32_t sf = (dst.kind == KindA64::x) ? 0x80000000 : 0;
place(dst.index | (src1.index << 5) | (shift << 10) | (src2.index << 16) | (op << 24) | sf);
place(dst.index | (src1.index << 5) | (shift << 10) | (src2.index << 16) | (N << 21) | (op << 24) | sf);
commit();
}
@ -764,7 +808,8 @@ void AssemblyBuilderA64::placeP(const char* name, RegisterA64 src1, RegisterA64
commit();
}
void AssemblyBuilderA64::placeCS(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond, uint8_t op, uint8_t opc)
void AssemblyBuilderA64::placeCS(
const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond, uint8_t op, uint8_t opc, int invert)
{
if (logText)
log(name, dst, src1, src2, cond);
@ -773,7 +818,7 @@ void AssemblyBuilderA64::placeCS(const char* name, RegisterA64 dst, RegisterA64
uint32_t sf = (dst.kind == KindA64::x) ? 0x80000000 : 0;
place(dst.index | (src1.index << 5) | (opc << 10) | (codeForCondition[int(cond)] << 12) | (src2.index << 16) | (op << 21) | sf);
place(dst.index | (src1.index << 5) | (opc << 10) | ((codeForCondition[int(cond)] ^ invert) << 12) | (src2.index << 16) | (op << 21) | sf);
commit();
}
@ -793,6 +838,29 @@ void AssemblyBuilderA64::placeFCMP(const char* name, RegisterA64 src1, RegisterA
commit();
}
void AssemblyBuilderA64::placeBM(const char* name, RegisterA64 dst, RegisterA64 src1, uint32_t src2, uint8_t op)
{
if (logText)
log(name, dst, src1, src2);
LUAU_ASSERT(dst.kind == KindA64::w || dst.kind == KindA64::x);
LUAU_ASSERT(dst.kind == src1.kind);
uint32_t sf = (dst.kind == KindA64::x) ? 0x80000000 : 0;
int lz = countlz(src2);
int rz = countrz(src2);
LUAU_ASSERT(lz + rz > 0 && lz + rz < 32); // must have at least one 0 and at least one 1
LUAU_ASSERT((src2 >> rz) == (1u << (32 - lz - rz)) - 1u); // sequence of 1s must be contiguous
int imms = 31 - lz - rz; // count of 1s minus 1
int immr = (32 - rz) & 31; // right rotate amount
place(dst.index | (src1.index << 5) | (imms << 10) | (immr << 16) | (op << 23) | sf);
commit();
}
void AssemblyBuilderA64::place(uint32_t word)
{
LUAU_ASSERT(codePos < codeEnd);
@ -965,10 +1033,13 @@ void AssemblyBuilderA64::log(const char* opcode, RegisterA64 dst, RegisterA64 sr
{
logAppend(" %-12s", opcode);
log(dst);
text.append(",");
log(src1);
text.append(",");
log(src2);
if ((src1 != wzr && src1 != xzr) || (src2 != wzr && src2 != xzr))
{
text.append(",");
log(src1);
text.append(",");
log(src2);
}
text.append(",");
text.append(textForCondition[int(cond)] + 2); // skip b.
text.append("\n");

View File

@ -31,7 +31,8 @@ static_assert(sizeof(setccTextForCondition) / sizeof(setccTextForCondition[0]) =
#define OP_PLUS_REG(op, reg) ((op) + (reg & 0x7))
#define OP_PLUS_CC(op, cc) ((op) + uint8_t(cc))
#define REX_W(value) (value ? 0x8 : 0x0)
#define REX_W_BIT(value) (value ? 0x8 : 0x0)
#define REX_W(reg) REX_W_BIT((reg).size == SizeX64::qword || ((reg).size == SizeX64::byte && (reg).index >= 4))
#define REX_R(reg) (((reg).index & 0x8) >> 1)
#define REX_X(reg) (((reg).index & 0x8) >> 2)
#define REX_B(reg) (((reg).index & 0x8) >> 3)
@ -1116,7 +1117,7 @@ void AssemblyBuilderX64::placeAvx(
void AssemblyBuilderX64::placeRex(RegisterX64 op)
{
uint8_t code = REX_W(op.size == SizeX64::qword) | REX_B(op);
uint8_t code = REX_W(op) | REX_B(op);
if (code != 0)
place(code | 0x40);
@ -1127,9 +1128,9 @@ void AssemblyBuilderX64::placeRex(OperandX64 op)
uint8_t code = 0;
if (op.cat == CategoryX64::reg)
code = REX_W(op.base.size == SizeX64::qword) | REX_B(op.base);
code = REX_W(op.base) | REX_B(op.base);
else if (op.cat == CategoryX64::mem)
code = REX_W(op.memSize == SizeX64::qword) | REX_X(op.index) | REX_B(op.base);
code = REX_W_BIT(op.memSize == SizeX64::qword) | REX_X(op.index) | REX_B(op.base);
else
LUAU_ASSERT(!"No encoding for left operand of this category");
@ -1154,7 +1155,7 @@ void AssemblyBuilderX64::placeRexNoW(OperandX64 op)
void AssemblyBuilderX64::placeRex(RegisterX64 lhs, OperandX64 rhs)
{
uint8_t code = REX_W(lhs.size == SizeX64::qword);
uint8_t code = REX_W(lhs);
if (rhs.cat == CategoryX64::imm)
code |= REX_B(lhs);

36
CodeGen/src/BitUtils.h Normal file
View File

@ -0,0 +1,36 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include <stdint.h>
#ifdef _MSC_VER
#include <intrin.h>
#endif
namespace Luau
{
namespace CodeGen
{
inline int countlz(uint32_t n)
{
#ifdef _MSC_VER
unsigned long rl;
return _BitScanReverse(&rl, n) ? 31 - int(rl) : 32;
#else
return n == 0 ? 32 : __builtin_clz(n);
#endif
}
inline int countrz(uint32_t n)
{
#ifdef _MSC_VER
unsigned long rl;
return _BitScanForward(&rl, n) ? int(rl) : 32;
#else
return n == 0 ? 32 : __builtin_ctz(n);
#endif
}
} // namespace CodeGen
} // namespace Luau

View File

@ -54,31 +54,6 @@ namespace CodeGen
void* createBlockUnwindInfo(void* context, uint8_t* block, size_t blockSize, size_t& beginOffset)
{
#if defined(_WIN32) && defined(_M_X64)
UnwindBuilder* unwind = (UnwindBuilder*)context;
// All unwinding related data is placed together at the start of the block
size_t unwindSize = sizeof(RUNTIME_FUNCTION) + unwind->getSize();
unwindSize = (unwindSize + (kCodeAlignment - 1)) & ~(kCodeAlignment - 1); // Match code allocator alignment
LUAU_ASSERT(blockSize >= unwindSize);
RUNTIME_FUNCTION* runtimeFunc = (RUNTIME_FUNCTION*)block;
runtimeFunc->BeginAddress = DWORD(unwindSize); // Code will start after the unwind info
runtimeFunc->EndAddress = DWORD(blockSize); // Whole block is a part of a 'single function'
runtimeFunc->UnwindInfoAddress = DWORD(sizeof(RUNTIME_FUNCTION)); // Unwind info is placed at the start of the block
char* unwindData = (char*)block + runtimeFunc->UnwindInfoAddress;
unwind->finalize(unwindData, block + unwindSize, blockSize - unwindSize);
if (!RtlAddFunctionTable(runtimeFunc, 1, uintptr_t(block)))
{
LUAU_ASSERT(!"failed to allocate function table");
return nullptr;
}
beginOffset = unwindSize + unwind->getBeginOffset();
return block;
#elif !defined(_WIN32)
UnwindBuilder* unwind = (UnwindBuilder*)context;
// All unwinding related data is placed together at the start of the block
@ -87,37 +62,34 @@ void* createBlockUnwindInfo(void* context, uint8_t* block, size_t blockSize, siz
LUAU_ASSERT(blockSize >= unwindSize);
char* unwindData = (char*)block;
unwind->finalize(unwindData, block, blockSize);
unwind->finalize(unwindData, unwindSize, block, blockSize);
#if defined(__APPLE__)
#if defined(_WIN32) && defined(_M_X64)
if (!RtlAddFunctionTable((RUNTIME_FUNCTION*)block, uint32_t(unwind->getFunctionCount()), uintptr_t(block)))
{
LUAU_ASSERT(!"failed to allocate function table");
return nullptr;
}
#elif defined(__APPLE__)
visitFdeEntries(unwindData, __register_frame);
#else
#elif !defined(_WIN32)
__register_frame(unwindData);
#endif
beginOffset = unwindSize + unwind->getBeginOffset();
return block;
#endif
return nullptr;
}
void destroyBlockUnwindInfo(void* context, void* unwindData)
{
#if defined(_WIN32) && defined(_M_X64)
RUNTIME_FUNCTION* runtimeFunc = (RUNTIME_FUNCTION*)unwindData;
if (!RtlDeleteFunctionTable(runtimeFunc))
if (!RtlDeleteFunctionTable((RUNTIME_FUNCTION*)unwindData))
LUAU_ASSERT(!"failed to deallocate function table");
#elif !defined(_WIN32)
#if defined(__APPLE__)
#elif defined(__APPLE__)
visitFdeEntries((char*)unwindData, __deregister_frame);
#else
#elif !defined(_WIN32)
__deregister_frame(unwindData);
#endif
#endif
}
} // namespace CodeGen

View File

@ -176,6 +176,10 @@ static bool lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction&
IrInst& inst = function.instructions[index];
// Substitutions might have meta information about operand restore location from memory
if (inst.cmd == IrCmd::SUBSTITUTE && inst.b.kind != IrOpKind::None)
function.recordRestoreOp(inst.a.index, inst.b);
// Skip pseudo instructions, but make sure they are not used at this stage
// This also prevents them from getting into text output when that's enabled
if (isPseudo(inst.cmd))
@ -195,7 +199,18 @@ static bool lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction&
lowering.lowerInst(inst, index, next);
if (lowering.hasError())
{
// Place labels for all blocks that we're skipping
// This is needed to avoid AssemblyBuilder assertions about jumps in earlier blocks with unplaced labels
for (size_t j = i + 1; j < sortedBlocks.size(); ++j)
{
IrBlock& abandoned = function.blocks[sortedBlocks[j]];
build.setLabel(abandoned.label);
}
return false;
}
}
if (options.includeIr)
@ -223,12 +238,8 @@ static bool lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction&
[[maybe_unused]] static bool lowerIr(
X64::AssemblyBuilderX64& build, IrBuilder& ir, NativeState& data, ModuleHelpers& helpers, Proto* proto, AssemblyOptions options)
{
constexpr uint32_t kFunctionAlignment = 32;
optimizeMemoryOperandsX64(ir.function);
build.align(kFunctionAlignment, X64::AlignmentDataX64::Ud2);
X64::IrLoweringX64 lowering(build, helpers, data, ir.function);
return lowerImpl(build, lowering, ir.function, proto->bytecodeid, options);
@ -237,9 +248,6 @@ static bool lowerImpl(AssemblyBuilder& build, IrLowering& lowering, IrFunction&
[[maybe_unused]] static bool lowerIr(
A64::AssemblyBuilderA64& build, IrBuilder& ir, NativeState& data, ModuleHelpers& helpers, Proto* proto, AssemblyOptions options)
{
if (!A64::IrLoweringA64::canLower(ir.function))
return false;
A64::IrLoweringA64 lowering(build, helpers, data, proto, ir.function);
return lowerImpl(build, lowering, ir.function, proto->bytecodeid, options);
@ -432,13 +440,13 @@ void create(lua_State* L)
initHelperFunctions(data);
#if defined(__x86_64__) || defined(_M_X64)
if (!X64::initEntryFunction(data))
if (!X64::initHeaderFunctions(data))
{
destroyNativeState(L);
return;
}
#elif defined(__aarch64__)
if (!A64::initEntryFunction(data))
if (!A64::initHeaderFunctions(data))
{
destroyNativeState(L);
return;

View File

@ -17,14 +17,107 @@ namespace CodeGen
namespace A64
{
bool initEntryFunction(NativeState& data)
struct EntryLocations
{
AssemblyBuilderA64 build(/* logText= */ false);
UnwindBuilder& unwind = *data.unwindBuilder.get();
Label start;
Label prologueEnd;
Label epilogueStart;
};
static void emitExit(AssemblyBuilderA64& build, bool continueInVm)
{
build.mov(x0, continueInVm);
build.ldr(x1, mem(rNativeContext, offsetof(NativeContext, gateExit)));
build.br(x1);
}
static void emitInterrupt(AssemblyBuilderA64& build)
{
// x0 = pc offset
// x1 = return address in native code
// x2 = interrupt
// Stash return address in rBase; we need to reload rBase anyway
build.mov(rBase, x1);
// Update savedpc; required in case interrupt errors
build.add(x0, rCode, x0);
build.ldr(x1, mem(rState, offsetof(lua_State, ci)));
build.str(x0, mem(x1, offsetof(CallInfo, savedpc)));
// Call interrupt
build.mov(x0, rState);
build.mov(w1, -1);
build.blr(x2);
// Check if we need to exit
Label skip;
build.ldrb(w0, mem(rState, offsetof(lua_State, status)));
build.cbz(w0, skip);
// L->ci->savedpc--
// note: recomputing this avoids having to stash x0
build.ldr(x1, mem(rState, offsetof(lua_State, ci)));
build.ldr(x0, mem(x1, offsetof(CallInfo, savedpc)));
build.sub(x0, x0, sizeof(Instruction));
build.str(x0, mem(x1, offsetof(CallInfo, savedpc)));
emitExit(build, /* continueInVm */ false);
build.setLabel(skip);
// Return back to caller; rBase has stashed return address
build.mov(x0, rBase);
emitUpdateBase(build); // interrupt may have reallocated stack
build.br(x0);
}
static void emitReentry(AssemblyBuilderA64& build, ModuleHelpers& helpers)
{
// x0 = closure object to reentry (equal to clvalue(L->ci->func))
// If the fallback requested an exit, we need to do this right away
build.cbz(x0, helpers.exitNoContinueVm);
emitUpdateBase(build);
// Need to update state of the current function before we jump away
build.ldr(x1, mem(x0, offsetof(Closure, l.p))); // cl->l.p aka proto
build.mov(rClosure, x0);
build.ldr(rConstants, mem(x1, offsetof(Proto, k))); // proto->k
build.ldr(rCode, mem(x1, offsetof(Proto, code))); // proto->code
// Get instruction index from instruction pointer
// To get instruction index from instruction pointer, we need to divide byte offset by 4
// But we will actually need to scale instruction index by 8 back to byte offset later so it cancels out
build.ldr(x2, mem(rState, offsetof(lua_State, ci))); // L->ci
build.ldr(x2, mem(x2, offsetof(CallInfo, savedpc))); // L->ci->savedpc
build.sub(x2, x2, rCode);
build.add(x2, x2, x2); // TODO: this would not be necessary if we supported shifted register offsets in loads
// We need to check if the new function can be executed natively
// TODO: This can be done earlier in the function flow, to reduce the JIT->VM transition penalty
build.ldr(x1, mem(x1, offsetofProtoExecData));
build.cbz(x1, helpers.exitContinueVm);
// Get new instruction location and jump to it
build.ldr(x1, mem(x1, offsetof(NativeProto, instTargets)));
build.ldr(x1, mem(x1, x2));
build.br(x1);
}
static EntryLocations buildEntryFunction(AssemblyBuilderA64& build, UnwindBuilder& unwind)
{
EntryLocations locations;
// Arguments: x0 = lua_State*, x1 = Proto*, x2 = native code pointer to jump to, x3 = NativeContext*
unwind.start();
locations.start = build.setLabel();
unwind.startFunction();
unwind.allocStack(8); // TODO: this is just a hack to make UnwindBuilder assertions cooperate
// prologue
@ -38,9 +131,7 @@ bool initEntryFunction(NativeState& data)
build.mov(x29, sp); // this is only necessary if we maintain frame pointers, which we do in the JIT for now
unwind.finish();
size_t prologueSize = build.setLabel().location;
locations.prologueEnd = build.setLabel();
// Setup native execution environment
build.mov(rState, x0);
@ -58,7 +149,7 @@ bool initEntryFunction(NativeState& data)
build.br(x2);
// Even though we jumped away, we will return here in the end
Label returnOff = build.setLabel();
locations.epilogueStart = build.setLabel();
// Cleanup and exit
build.ldp(x23, x24, mem(sp, 48));
@ -69,12 +160,30 @@ bool initEntryFunction(NativeState& data)
build.ret();
// Our entry function is special, it spans the whole remaining code area
unwind.finishFunction(build.getLabelOffset(locations.start), kFullBlockFuncton);
return locations;
}
bool initHeaderFunctions(NativeState& data)
{
AssemblyBuilderA64 build(/* logText= */ false);
UnwindBuilder& unwind = *data.unwindBuilder.get();
unwind.startInfo();
EntryLocations entryLocations = buildEntryFunction(build, unwind);
build.finalize();
unwind.finishInfo();
LUAU_ASSERT(build.data.empty());
uint8_t* codeStart = nullptr;
if (!data.codeAllocator.allocate(build.data.data(), int(build.data.size()), reinterpret_cast<const uint8_t*>(build.code.data()),
int(build.code.size() * sizeof(build.code[0])), data.gateData, data.gateDataSize, data.context.gateEntry))
int(build.code.size() * sizeof(build.code[0])), data.gateData, data.gateDataSize, codeStart))
{
LUAU_ASSERT(!"failed to create entry function");
return false;
@ -82,9 +191,10 @@ bool initEntryFunction(NativeState& data)
// Set the offset at the begining so that functions in new blocks will not overlay the locations
// specified by the unwind information of the entry function
unwind.setBeginOffset(prologueSize);
unwind.setBeginOffset(build.getLabelOffset(entryLocations.prologueEnd));
data.context.gateExit = data.context.gateEntry + build.getLabelOffset(returnOff);
data.context.gateEntry = codeStart + build.getLabelOffset(entryLocations.start);
data.context.gateExit = codeStart + build.getLabelOffset(entryLocations.epilogueStart);
return true;
}

View File

@ -14,7 +14,7 @@ namespace A64
class AssemblyBuilderA64;
bool initEntryFunction(NativeState& data);
bool initHeaderFunctions(NativeState& data);
void assembleHelpers(AssemblyBuilderA64& build, ModuleHelpers& helpers);
} // namespace A64

View File

@ -13,12 +13,58 @@ namespace Luau
namespace CodeGen
{
bool forgLoopTableIter(lua_State* L, Table* h, int index, TValue* ra)
{
int sizearray = h->sizearray;
// first we advance index through the array portion
while (unsigned(index) < unsigned(sizearray))
{
TValue* e = &h->array[index];
if (!ttisnil(e))
{
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)));
setnvalue(ra + 3, double(index + 1));
setobj2s(L, ra + 4, e);
return true;
}
index++;
}
int sizenode = 1 << h->lsizenode;
// then we advance index through the hash portion
while (unsigned(index - h->sizearray) < unsigned(sizenode))
{
LuaNode* n = &h->node[index - sizearray];
if (!ttisnil(gval(n)))
{
setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)));
getnodekey(L, ra + 3, n);
setobj(L, ra + 4, gval(n));
return true;
}
index++;
}
return false;
}
bool forgLoopNodeIter(lua_State* L, Table* h, int index, TValue* ra)
{
int sizearray = h->sizearray;
int sizenode = 1 << h->lsizenode;
// then we advance index through the hash portion
while (unsigned(index - h->sizearray) < unsigned(1 << h->lsizenode))
while (unsigned(index - sizearray) < unsigned(sizenode))
{
LuaNode* n = &h->node[index - h->sizearray];
LuaNode* n = &h->node[index - sizearray];
if (!ttisnil(gval(n)))
{

View File

@ -8,6 +8,7 @@ namespace Luau
namespace CodeGen
{
bool forgLoopTableIter(lua_State* L, Table* h, int index, TValue* ra);
bool forgLoopNodeIter(lua_State* L, Table* h, int index, TValue* ra);
bool forgLoopNonTableFallback(lua_State* L, int insnA, int aux);

View File

@ -41,12 +41,21 @@ namespace CodeGen
namespace X64
{
bool initEntryFunction(NativeState& data)
struct EntryLocations
{
AssemblyBuilderX64 build(/* logText= */ false);
UnwindBuilder& unwind = *data.unwindBuilder.get();
Label start;
Label prologueEnd;
Label epilogueStart;
};
unwind.start();
static EntryLocations buildEntryFunction(AssemblyBuilderX64& build, UnwindBuilder& unwind)
{
EntryLocations locations;
build.align(kFunctionAlignment, X64::AlignmentDataX64::Ud2);
locations.start = build.setLabel();
unwind.startFunction();
// Save common non-volatile registers
build.push(rbp);
@ -84,9 +93,7 @@ bool initEntryFunction(NativeState& data)
build.sub(rsp, kStackSize + kLocalsSize);
unwind.allocStack(kStackSize + kLocalsSize);
unwind.finish();
size_t prologueSize = build.setLabel().location;
locations.prologueEnd = build.setLabel();
// Setup native execution environment
build.mov(rState, rArg1);
@ -104,7 +111,7 @@ bool initEntryFunction(NativeState& data)
build.jmp(rArg3);
// Even though we jumped away, we will return here in the end
Label returnOff = build.setLabel();
locations.epilogueStart = build.setLabel();
// Cleanup and exit
build.add(rsp, kStackSize + kLocalsSize);
@ -123,12 +130,30 @@ bool initEntryFunction(NativeState& data)
build.pop(rbp);
build.ret();
// Our entry function is special, it spans the whole remaining code area
unwind.finishFunction(build.getLabelOffset(locations.start), kFullBlockFuncton);
return locations;
}
bool initHeaderFunctions(NativeState& data)
{
AssemblyBuilderX64 build(/* logText= */ false);
UnwindBuilder& unwind = *data.unwindBuilder.get();
unwind.startInfo();
EntryLocations entryLocations = buildEntryFunction(build, unwind);
build.finalize();
unwind.finishInfo();
LUAU_ASSERT(build.data.empty());
if (!data.codeAllocator.allocate(build.data.data(), int(build.data.size()), build.code.data(), int(build.code.size()), data.gateData,
data.gateDataSize, data.context.gateEntry))
uint8_t* codeStart = nullptr;
if (!data.codeAllocator.allocate(
build.data.data(), int(build.data.size()), build.code.data(), int(build.code.size()), data.gateData, data.gateDataSize, codeStart))
{
LUAU_ASSERT(!"failed to create entry function");
return false;
@ -136,9 +161,10 @@ bool initEntryFunction(NativeState& data)
// Set the offset at the begining so that functions in new blocks will not overlay the locations
// specified by the unwind information of the entry function
unwind.setBeginOffset(prologueSize);
unwind.setBeginOffset(build.getLabelOffset(entryLocations.prologueEnd));
data.context.gateExit = data.context.gateEntry + returnOff.location;
data.context.gateEntry = codeStart + build.getLabelOffset(entryLocations.start);
data.context.gateExit = codeStart + build.getLabelOffset(entryLocations.epilogueStart);
return true;
}

View File

@ -14,7 +14,7 @@ namespace X64
class AssemblyBuilderX64;
bool initEntryFunction(NativeState& data);
bool initHeaderFunctions(NativeState& data);
void assembleHelpers(AssemblyBuilderX64& build, ModuleHelpers& helpers);
} // namespace X64

View File

@ -107,47 +107,11 @@ void emitBuiltinMathLog(IrRegAllocX64& regs, AssemblyBuilderX64& build, int npar
regs.assertAllFree();
build.vmovsd(xmm0, luauRegValue(arg));
if (nparams == 1)
{
build.call(qword[rNativeContext + offsetof(NativeContext, libm_log)]);
}
else
{
Label log10check, logdivlog, exit;
// Using 'rbx' for non-volatile temporary storage of log(arg1) result
RegisterX64 tmp = rbx;
OperandX64 arg2value = qword[args + offsetof(TValue, value)];
build.vmovsd(xmm1, arg2value);
jumpOnNumberCmp(build, noreg, build.f64(2.0), xmm1, IrCondition::NotEqual, log10check);
// TODO: IR builtin lowering assumes that the only valid 2-argument call is log2; ideally, we use a less hacky way to indicate that
if (nparams == 2)
build.call(qword[rNativeContext + offsetof(NativeContext, libm_log2)]);
build.jmp(exit);
build.setLabel(log10check);
jumpOnNumberCmp(build, noreg, build.f64(10.0), xmm1, IrCondition::NotEqual, logdivlog);
build.call(qword[rNativeContext + offsetof(NativeContext, libm_log10)]);
build.jmp(exit);
build.setLabel(logdivlog);
// log(arg1)
else
build.call(qword[rNativeContext + offsetof(NativeContext, libm_log)]);
build.vmovq(tmp, xmm0);
// log(arg2)
build.vmovsd(xmm0, arg2value);
build.call(qword[rNativeContext + offsetof(NativeContext, libm_log)]);
// log(arg1) / log(arg2)
build.vmovq(xmm1, tmp);
build.vdivsd(xmm0, xmm1, xmm0);
build.setLabel(exit);
}
build.vmovsd(luauRegValue(ra), xmm0);
}
@ -256,62 +220,68 @@ void emitBuiltin(IrRegAllocX64& regs, AssemblyBuilderX64& build, int bfid, int r
switch (bfid)
{
case LBF_ASSERT:
case LBF_MATH_DEG:
case LBF_MATH_RAD:
case LBF_MATH_MIN:
case LBF_MATH_MAX:
case LBF_MATH_CLAMP:
case LBF_MATH_FLOOR:
case LBF_MATH_CEIL:
case LBF_MATH_SQRT:
case LBF_MATH_POW:
case LBF_MATH_ABS:
case LBF_MATH_ROUND:
// These instructions are fully translated to IR
break;
case LBF_MATH_EXP:
LUAU_ASSERT(nparams == 1 && nresults == 1);
return emitBuiltinMathExp(regs, build, nparams, ra, arg, argsOp, nresults);
case LBF_MATH_FMOD:
LUAU_ASSERT(nparams == 2 && nresults == 1);
return emitBuiltinMathFmod(regs, build, nparams, ra, arg, argsOp, nresults);
case LBF_MATH_ASIN:
LUAU_ASSERT(nparams == 1 && nresults == 1);
return emitBuiltinMathAsin(regs, build, nparams, ra, arg, argsOp, nresults);
case LBF_MATH_SIN:
LUAU_ASSERT(nparams == 1 && nresults == 1);
return emitBuiltinMathSin(regs, build, nparams, ra, arg, argsOp, nresults);
case LBF_MATH_SINH:
LUAU_ASSERT(nparams == 1 && nresults == 1);
return emitBuiltinMathSinh(regs, build, nparams, ra, arg, argsOp, nresults);
case LBF_MATH_ACOS:
LUAU_ASSERT(nparams == 1 && nresults == 1);
return emitBuiltinMathAcos(regs, build, nparams, ra, arg, argsOp, nresults);
case LBF_MATH_COS:
LUAU_ASSERT(nparams == 1 && nresults == 1);
return emitBuiltinMathCos(regs, build, nparams, ra, arg, argsOp, nresults);
case LBF_MATH_COSH:
LUAU_ASSERT(nparams == 1 && nresults == 1);
return emitBuiltinMathCosh(regs, build, nparams, ra, arg, argsOp, nresults);
case LBF_MATH_ATAN:
LUAU_ASSERT(nparams == 1 && nresults == 1);
return emitBuiltinMathAtan(regs, build, nparams, ra, arg, argsOp, nresults);
case LBF_MATH_TAN:
LUAU_ASSERT(nparams == 1 && nresults == 1);
return emitBuiltinMathTan(regs, build, nparams, ra, arg, argsOp, nresults);
case LBF_MATH_TANH:
LUAU_ASSERT(nparams == 1 && nresults == 1);
return emitBuiltinMathTanh(regs, build, nparams, ra, arg, argsOp, nresults);
case LBF_MATH_ATAN2:
LUAU_ASSERT(nparams == 2 && nresults == 1);
return emitBuiltinMathAtan2(regs, build, nparams, ra, arg, argsOp, nresults);
case LBF_MATH_LOG10:
LUAU_ASSERT(nparams == 1 && nresults == 1);
return emitBuiltinMathLog10(regs, build, nparams, ra, arg, argsOp, nresults);
case LBF_MATH_LOG:
LUAU_ASSERT((nparams == 1 || nparams == 2) && nresults == 1);
return emitBuiltinMathLog(regs, build, nparams, ra, arg, argsOp, nresults);
case LBF_MATH_LDEXP:
LUAU_ASSERT(nparams == 2 && nresults == 1);
return emitBuiltinMathLdexp(regs, build, nparams, ra, arg, argsOp, nresults);
case LBF_MATH_FREXP:
LUAU_ASSERT(nparams == 1 && (nresults == 1 || nresults == 2));
return emitBuiltinMathFrexp(regs, build, nparams, ra, arg, argsOp, nresults);
case LBF_MATH_MODF:
LUAU_ASSERT(nparams == 1 && (nresults == 1 || nresults == 2));
return emitBuiltinMathModf(regs, build, nparams, ra, arg, argsOp, nresults);
case LBF_MATH_SIGN:
LUAU_ASSERT(nparams == 1 && nresults == 1);
return emitBuiltinMathSign(regs, build, nparams, ra, arg, argsOp, nresults);
case LBF_TYPE:
LUAU_ASSERT(nparams == 1 && nresults == 1);
return emitBuiltinType(regs, build, nparams, ra, arg, argsOp, nresults);
case LBF_TYPEOF:
LUAU_ASSERT(nparams == 1 && nresults == 1);
return emitBuiltinTypeof(regs, build, nparams, ra, arg, argsOp, nresults);
default:
LUAU_ASSERT(!"missing x64 lowering");
LUAU_ASSERT(!"Missing x64 lowering");
break;
}
}

View File

@ -13,8 +13,8 @@ constexpr unsigned kLuaNodeSizeLog2 = 5;
constexpr unsigned kLuaNodeTagMask = 0xf;
constexpr unsigned kNextBitOffset = 4;
constexpr unsigned kOffsetOfLuaNodeTag = 12; // offsetof cannot be used on a bit field
constexpr unsigned kOffsetOfLuaNodeNext = 12; // offsetof cannot be used on a bit field
constexpr unsigned kOffsetOfTKeyTag = 12; // offsetof cannot be used on a bit field
constexpr unsigned kOffsetOfTKeyNext = 12; // offsetof cannot be used on a bit field
constexpr unsigned kOffsetOfInstructionC = 3;
// Leaf functions that are placed in every module to perform common instruction sequences

View File

@ -1,130 +0,0 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "EmitCommonA64.h"
#include "NativeState.h"
#include "CustomExecUtils.h"
namespace Luau
{
namespace CodeGen
{
namespace A64
{
void emitUpdateBase(AssemblyBuilderA64& build)
{
build.ldr(rBase, mem(rState, offsetof(lua_State, base)));
}
void emitExit(AssemblyBuilderA64& build, bool continueInVm)
{
build.mov(x0, continueInVm);
build.ldr(x1, mem(rNativeContext, offsetof(NativeContext, gateExit)));
build.br(x1);
}
void emitInterrupt(AssemblyBuilderA64& build)
{
// x0 = pc offset
// x1 = return address in native code
// x2 = interrupt
// Stash return address in rBase; we need to reload rBase anyway
build.mov(rBase, x1);
// Update savedpc; required in case interrupt errors
build.add(x0, rCode, x0);
build.ldr(x1, mem(rState, offsetof(lua_State, ci)));
build.str(x0, mem(x1, offsetof(CallInfo, savedpc)));
// Call interrupt
build.mov(x0, rState);
build.mov(w1, -1);
build.blr(x2);
// Check if we need to exit
Label skip;
build.ldrb(w0, mem(rState, offsetof(lua_State, status)));
build.cbz(w0, skip);
// L->ci->savedpc--
// note: recomputing this avoids having to stash x0
build.ldr(x1, mem(rState, offsetof(lua_State, ci)));
build.ldr(x0, mem(x1, offsetof(CallInfo, savedpc)));
build.sub(x0, x0, sizeof(Instruction));
build.str(x0, mem(x1, offsetof(CallInfo, savedpc)));
emitExit(build, /* continueInVm */ false);
build.setLabel(skip);
// Return back to caller; rBase has stashed return address
build.mov(x0, rBase);
emitUpdateBase(build); // interrupt may have reallocated stack
build.br(x0);
}
void emitReentry(AssemblyBuilderA64& build, ModuleHelpers& helpers)
{
// x0 = closure object to reentry (equal to clvalue(L->ci->func))
// If the fallback requested an exit, we need to do this right away
build.cbz(x0, helpers.exitNoContinueVm);
emitUpdateBase(build);
// Need to update state of the current function before we jump away
build.ldr(x1, mem(x0, offsetof(Closure, l.p))); // cl->l.p aka proto
build.mov(rClosure, x0);
build.ldr(rConstants, mem(x1, offsetof(Proto, k))); // proto->k
build.ldr(rCode, mem(x1, offsetof(Proto, code))); // proto->code
// Get instruction index from instruction pointer
// To get instruction index from instruction pointer, we need to divide byte offset by 4
// But we will actually need to scale instruction index by 8 back to byte offset later so it cancels out
build.ldr(x2, mem(rState, offsetof(lua_State, ci))); // L->ci
build.ldr(x2, mem(x2, offsetof(CallInfo, savedpc))); // L->ci->savedpc
build.sub(x2, x2, rCode);
build.add(x2, x2, x2); // TODO: this would not be necessary if we supported shifted register offsets in loads
// We need to check if the new function can be executed natively
// TODO: This can be done earlier in the function flow, to reduce the JIT->VM transition penalty
build.ldr(x1, mem(x1, offsetofProtoExecData));
build.cbz(x1, helpers.exitContinueVm);
// Get new instruction location and jump to it
build.ldr(x1, mem(x1, offsetof(NativeProto, instTargets)));
build.ldr(x1, mem(x1, x2));
build.br(x1);
}
void emitFallback(AssemblyBuilderA64& build, int op, int pcpos)
{
// fallback(L, instruction, base, k)
build.mov(x0, rState);
// TODO: refactor into a common helper
if (pcpos * sizeof(Instruction) <= AssemblyBuilderA64::kMaxImmediate)
{
build.add(x1, rCode, uint16_t(pcpos * sizeof(Instruction)));
}
else
{
build.mov(x1, pcpos * sizeof(Instruction));
build.add(x1, rCode, x1);
}
build.mov(x2, rBase);
build.mov(x3, rConstants);
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, fallback) + op * sizeof(NativeFallback) + offsetof(NativeFallback, fallback)));
build.blr(x4);
emitUpdateBase(build);
}
} // namespace A64
} // namespace CodeGen
} // namespace Luau

View File

@ -7,6 +7,7 @@
#include "lobject.h"
#include "ltm.h"
#include "lstate.h"
// AArch64 ABI reminder:
// Arguments: x0-x7, v0-v7
@ -38,15 +39,19 @@ constexpr RegisterA64 rBase = x24; // StkId base
// Native code is as stackless as the interpreter, so we can place some data on the stack once and have it accessible at any point
// See CodeGenA64.cpp for layout
constexpr unsigned kStackSize = 64; // 8 stashed registers
constexpr unsigned kStashSlots = 8; // stashed non-volatile registers
constexpr unsigned kSpillSlots = 0; // slots for spilling temporary registers (unused)
constexpr unsigned kTempSlots = 2; // 16 bytes of temporary space, such luxury!
void emitUpdateBase(AssemblyBuilderA64& build);
constexpr unsigned kStackSize = (kStashSlots + kSpillSlots + kTempSlots) * 8;
// TODO: Move these to CodeGenA64 so that they can't be accidentally called during lowering
void emitExit(AssemblyBuilderA64& build, bool continueInVm);
void emitInterrupt(AssemblyBuilderA64& build);
void emitReentry(AssemblyBuilderA64& build, ModuleHelpers& helpers);
void emitFallback(AssemblyBuilderA64& build, int op, int pcpos);
constexpr AddressA64 sSpillArea = mem(sp, kStashSlots * 8);
constexpr AddressA64 sTemporary = mem(sp, (kStashSlots + kSpillSlots) * 8);
inline void emitUpdateBase(AssemblyBuilderA64& build)
{
build.ldr(rBase, mem(rState, offsetof(lua_State, base)));
}
} // namespace A64
} // namespace CodeGen

View File

@ -279,32 +279,37 @@ void emitUpdateBase(AssemblyBuilderX64& build)
build.mov(rBase, qword[rState + offsetof(lua_State, base)]);
}
// Note: only uses rax/rdx, the caller may use other registers
static void emitSetSavedPc(AssemblyBuilderX64& build, int pcpos)
static void emitSetSavedPc(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos)
{
build.mov(rdx, sCode);
build.add(rdx, pcpos * sizeof(Instruction));
build.mov(rax, qword[rState + offsetof(lua_State, ci)]);
build.mov(qword[rax + offsetof(CallInfo, savedpc)], rdx);
ScopedRegX64 tmp1{regs, SizeX64::qword};
ScopedRegX64 tmp2{regs, SizeX64::qword};
build.mov(tmp1.reg, sCode);
build.add(tmp1.reg, pcpos * sizeof(Instruction));
build.mov(tmp2.reg, qword[rState + offsetof(lua_State, ci)]);
build.mov(qword[tmp2.reg + offsetof(CallInfo, savedpc)], tmp1.reg);
}
void emitInterrupt(AssemblyBuilderX64& build, int pcpos)
void emitInterrupt(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos)
{
Label skip;
ScopedRegX64 tmp{regs, SizeX64::qword};
// Skip if there is no interrupt set
build.mov(r8, qword[rState + offsetof(lua_State, global)]);
build.mov(r8, qword[r8 + offsetof(global_State, cb.interrupt)]);
build.test(r8, r8);
build.mov(tmp.reg, qword[rState + offsetof(lua_State, global)]);
build.mov(tmp.reg, qword[tmp.reg + offsetof(global_State, cb.interrupt)]);
build.test(tmp.reg, tmp.reg);
build.jcc(ConditionX64::Zero, skip);
emitSetSavedPc(build, pcpos + 1); // uses rax/rdx
emitSetSavedPc(regs, build, pcpos + 1);
// Call interrupt
// TODO: This code should move to the end of the function, or even be outlined so that it can be shared by multiple interruptible instructions
build.mov(rArg1, rState);
build.mov(dwordReg(rArg2), -1); // function accepts 'int' here and using qword reg would've forced 8 byte constant here
build.call(r8);
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::dword, -1);
callWrap.call(tmp.release());
emitUpdateBase(build); // interrupt may have reallocated stack
@ -320,41 +325,23 @@ void emitInterrupt(AssemblyBuilderX64& build, int pcpos)
build.setLabel(skip);
}
void emitFallback(AssemblyBuilderX64& build, NativeState& data, int op, int pcpos)
void emitFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, NativeState& data, int op, int pcpos)
{
NativeFallback& opinfo = data.context.fallback[op];
LUAU_ASSERT(opinfo.fallback);
if (build.logText)
build.logAppend("; fallback\n");
LUAU_ASSERT(data.context.fallback[op]);
// fallback(L, instruction, base, k)
build.mov(rArg1, rState);
build.mov(rArg2, sCode);
build.add(rArg2, pcpos * sizeof(Instruction));
build.mov(rArg3, rBase);
build.mov(rArg4, rConstants);
build.call(qword[rNativeContext + offsetof(NativeContext, fallback) + op * sizeof(NativeFallback) + offsetof(NativeFallback, fallback)]);
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::qword, rState);
RegisterX64 reg = callWrap.suggestNextArgumentRegister(SizeX64::qword);
build.mov(reg, sCode);
callWrap.addArgument(SizeX64::qword, addr[reg + pcpos * sizeof(Instruction)]);
callWrap.addArgument(SizeX64::qword, rBase);
callWrap.addArgument(SizeX64::qword, rConstants);
callWrap.call(qword[rNativeContext + offsetof(NativeContext, fallback) + op * sizeof(FallbackFn)]);
emitUpdateBase(build);
// Some instructions may jump to a different instruction or a completely different function
if (opinfo.flags & kFallbackUpdatePc)
{
build.mov(rcx, sClosure);
build.mov(rcx, qword[rcx + offsetof(Closure, l.p)]);
// Get instruction index from returned instruction pointer
// To get instruction index from instruction pointer, we need to divide byte offset by 4
// But we will actually need to scale instruction index by 8 back to byte offset later so it cancels out
build.sub(rax, sCode);
build.mov(rdx, qword[rcx + offsetofProtoExecData]);
// Get new instruction location and jump to it
build.mov(rcx, qword[rdx + offsetof(NativeProto, instTargets)]);
build.jmp(qword[rax * 2 + rcx]);
}
}
void emitContinueCallInVm(AssemblyBuilderX64& build)

View File

@ -34,6 +34,8 @@ namespace X64
struct IrRegAllocX64;
constexpr uint32_t kFunctionAlignment = 32;
// Data that is very common to access is placed in non-volatile registers
constexpr RegisterX64 rState = r15; // lua_State* L
constexpr RegisterX64 rBase = r14; // StkId base
@ -134,7 +136,7 @@ inline OperandX64 luauNodeKeyValue(RegisterX64 node)
// Note: tag has dirty upper bits
inline OperandX64 luauNodeKeyTag(RegisterX64 node)
{
return dword[node + offsetof(LuaNode, key) + kOffsetOfLuaNodeTag];
return dword[node + offsetof(LuaNode, key) + kOffsetOfTKeyTag];
}
inline OperandX64 luauNodeValue(RegisterX64 node)
@ -162,12 +164,6 @@ inline void jumpIfTagIsNot(AssemblyBuilderX64& build, int ri, lua_Type tag, Labe
build.jcc(ConditionX64::NotEqual, label);
}
inline void jumpIfTagIsNot(AssemblyBuilderX64& build, RegisterX64 reg, lua_Type tag, Label& label)
{
build.cmp(dword[reg + offsetof(TValue, tt)], tag);
build.jcc(ConditionX64::NotEqual, label);
}
// Note: fallthrough label should be placed after this condition
inline void jumpIfFalsy(AssemblyBuilderX64& build, int ri, Label& target, Label& fallthrough)
{
@ -188,26 +184,6 @@ inline void jumpIfTruthy(AssemblyBuilderX64& build, int ri, Label& target, Label
build.jcc(ConditionX64::NotEqual, target); // true if boolean value is 'true'
}
inline void jumpIfMetatablePresent(AssemblyBuilderX64& build, RegisterX64 table, Label& target)
{
build.cmp(qword[table + offsetof(Table, metatable)], 0);
build.jcc(ConditionX64::NotEqual, target);
}
inline void jumpIfUnsafeEnv(AssemblyBuilderX64& build, RegisterX64 tmp, Label& label)
{
build.mov(tmp, sClosure);
build.mov(tmp, qword[tmp + offsetof(Closure, env)]);
build.test(byte[tmp + offsetof(Table, safeenv)], 1);
build.jcc(ConditionX64::Zero, label); // Not a safe environment
}
inline void jumpIfTableIsReadOnly(AssemblyBuilderX64& build, RegisterX64 table, Label& label)
{
build.cmp(byte[table + offsetof(Table, readonly)], 0);
build.jcc(ConditionX64::NotEqual, label);
}
inline void jumpIfNodeKeyTagIsNot(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 node, lua_Type tag, Label& label)
{
tmp.size = SizeX64::dword;
@ -224,13 +200,6 @@ inline void jumpIfNodeValueTagIs(AssemblyBuilderX64& build, RegisterX64 node, lu
build.jcc(ConditionX64::Equal, label);
}
inline void jumpIfNodeHasNext(AssemblyBuilderX64& build, RegisterX64 node, Label& label)
{
build.mov(ecx, dword[node + offsetof(LuaNode, key) + kOffsetOfLuaNodeNext]);
build.shr(ecx, kNextBitOffset);
build.jcc(ConditionX64::NotZero, label);
}
inline void jumpIfNodeKeyNotInExpectedSlot(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 node, OperandX64 expectedKey, Label& label)
{
jumpIfNodeKeyTagIsNot(build, tmp, node, LUA_TSTRING, label);
@ -260,8 +229,8 @@ void callStepGc(IrRegAllocX64& regs, AssemblyBuilderX64& build);
void emitExit(AssemblyBuilderX64& build, bool continueInVm);
void emitUpdateBase(AssemblyBuilderX64& build);
void emitInterrupt(AssemblyBuilderX64& build, int pcpos);
void emitFallback(AssemblyBuilderX64& build, NativeState& data, int op, int pcpos);
void emitInterrupt(IrRegAllocX64& regs, AssemblyBuilderX64& build, int pcpos);
void emitFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, NativeState& data, int op, int pcpos);
void emitContinueCallInVm(AssemblyBuilderX64& build);

View File

@ -1,74 +0,0 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "EmitInstructionA64.h"
#include "Luau/AssemblyBuilderA64.h"
#include "EmitCommonA64.h"
#include "NativeState.h"
#include "CustomExecUtils.h"
namespace Luau
{
namespace CodeGen
{
namespace A64
{
void emitInstReturn(AssemblyBuilderA64& build, ModuleHelpers& helpers, int ra, int n)
{
// callFallback(L, ra, n)
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(ra * sizeof(TValue)));
build.mov(w2, n);
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, returnFallback)));
build.blr(x3);
// reentry with x0=closure (NULL will trigger exit)
build.b(helpers.reentry);
}
void emitInstCall(AssemblyBuilderA64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults)
{
// argtop = (nparams == LUA_MULTRET) ? L->top : ra + 1 + nparams;
if (nparams == LUA_MULTRET)
build.ldr(x2, mem(rState, offsetof(lua_State, top)));
else
build.add(x2, rBase, uint16_t((ra + 1 + nparams) * sizeof(TValue)));
// callFallback(L, ra, argtop, nresults)
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(ra * sizeof(TValue)));
build.mov(w3, nresults);
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, callFallback)));
build.blr(x4);
// reentry with x0=closure (NULL will trigger exit)
build.b(helpers.reentry);
}
void emitInstGetImport(AssemblyBuilderA64& build, int ra, uint32_t aux)
{
// luaV_getimport(L, cl->env, k, aux, /* propagatenil= */ false)
build.mov(x0, rState);
build.ldr(x1, mem(rClosure, offsetof(Closure, env)));
build.mov(x2, rConstants);
build.mov(w3, aux);
build.mov(w4, 0);
build.ldr(x5, mem(rNativeContext, offsetof(NativeContext, luaV_getimport)));
build.blr(x5);
emitUpdateBase(build);
// setobj2s(L, ra, L->top - 1)
build.ldr(x0, mem(rState, offsetof(lua_State, top)));
build.sub(x0, x0, sizeof(TValue));
build.ldr(q0, x0);
build.str(q0, mem(rBase, ra * sizeof(TValue)));
// L->top--
build.str(x0, mem(rState, offsetof(lua_State, top)));
}
} // namespace A64
} // namespace CodeGen
} // namespace Luau

View File

@ -1,24 +0,0 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include <stdint.h>
namespace Luau
{
namespace CodeGen
{
struct ModuleHelpers;
namespace A64
{
class AssemblyBuilderA64;
void emitInstReturn(AssemblyBuilderA64& build, ModuleHelpers& helpers, int ra, int n);
void emitInstCall(AssemblyBuilderA64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults);
void emitInstGetImport(AssemblyBuilderA64& build, int ra, uint32_t aux);
} // namespace A64
} // namespace CodeGen
} // namespace Luau

View File

@ -415,7 +415,7 @@ void emitInstSetList(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int
callBarrierTableFast(regs, build, table, {});
}
void emitinstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat, Label& loopExit)
void emitInstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat)
{
// ipairs-style traversal is handled in IR
LUAU_ASSERT(aux >= 0);
@ -484,78 +484,6 @@ void emitinstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRep
build.jcc(ConditionX64::NotZero, loopRepeat);
}
void emitinstForGLoopFallback(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat)
{
build.mov(rArg1, rState);
build.mov(dwordReg(rArg2), ra);
build.mov(dwordReg(rArg3), aux);
build.call(qword[rNativeContext + offsetof(NativeContext, forgLoopNonTableFallback)]);
emitUpdateBase(build);
build.test(al, al);
build.jcc(ConditionX64::NotZero, loopRepeat);
}
void emitInstForGPrepXnextFallback(AssemblyBuilderX64& build, int pcpos, int ra, Label& target)
{
build.mov(rArg1, rState);
build.lea(rArg2, luauRegAddress(ra));
build.mov(dwordReg(rArg3), pcpos + 1);
build.call(qword[rNativeContext + offsetof(NativeContext, forgPrepXnextFallback)]);
build.jmp(target);
}
void emitInstGetImportFallback(AssemblyBuilderX64& build, int ra, uint32_t aux)
{
build.mov(rax, sClosure);
// luaV_getimport(L, cl->env, k, aux, /* propagatenil= */ false)
build.mov(rArg1, rState);
build.mov(rArg2, qword[rax + offsetof(Closure, env)]);
build.mov(rArg3, rConstants);
build.mov(dwordReg(rArg4), aux);
if (build.abi == ABIX64::Windows)
build.mov(sArg5, 0);
else
build.xor_(rArg5, rArg5);
build.call(qword[rNativeContext + offsetof(NativeContext, luaV_getimport)]);
emitUpdateBase(build);
// setobj2s(L, ra, L->top - 1)
build.mov(rax, qword[rState + offsetof(lua_State, top)]);
build.sub(rax, sizeof(TValue));
build.vmovups(xmm0, xmmword[rax]);
build.vmovups(luauReg(ra), xmm0);
// L->top--
build.mov(qword[rState + offsetof(lua_State, top)], rax);
}
void emitInstCoverage(AssemblyBuilderX64& build, int pcpos)
{
build.mov(rcx, sCode);
build.add(rcx, pcpos * sizeof(Instruction));
// hits = LUAU_INSN_E(*pc)
build.mov(edx, dword[rcx]);
build.sar(edx, 8);
// hits = (hits < (1 << 23) - 1) ? hits + 1 : hits;
build.xor_(eax, eax);
build.cmp(edx, (1 << 23) - 1);
build.setcc(ConditionX64::NotEqual, al);
build.add(edx, eax);
// VM_PATCH_E(pc, hits);
build.sal(edx, 8);
build.movzx(eax, byte[rcx]);
build.or_(eax, edx);
build.mov(dword[rcx], eax);
}
} // namespace X64
} // namespace CodeGen
} // namespace Luau

View File

@ -20,11 +20,7 @@ struct IrRegAllocX64;
void emitInstCall(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults);
void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int actualResults);
void emitInstSetList(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, int count, uint32_t index);
void emitinstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat, Label& loopExit);
void emitinstForGLoopFallback(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat);
void emitInstForGPrepXnextFallback(AssemblyBuilderX64& build, int pcpos, int ra, Label& target);
void emitInstGetImportFallback(AssemblyBuilderX64& build, int ra, uint32_t aux);
void emitInstCoverage(AssemblyBuilderX64& build, int pcpos);
void emitInstForGLoop(AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat);
} // namespace X64
} // namespace CodeGen

View File

@ -416,6 +416,44 @@ const Instruction* execute_LOP_NAMECALL(lua_State* L, const Instruction* pc, Stk
return pc;
}
const Instruction* execute_LOP_SETLIST(lua_State* L, const Instruction* pc, StkId base, TValue* k)
{
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);
Instruction insn = *pc++;
StkId ra = VM_REG(LUAU_INSN_A(insn));
StkId rb = &base[LUAU_INSN_B(insn)]; // note: this can point to L->top if c == LUA_MULTRET making VM_REG unsafe to use
int c = LUAU_INSN_C(insn) - 1;
uint32_t index = *pc++;
if (c == LUA_MULTRET)
{
c = int(L->top - rb);
L->top = L->ci->top;
}
Table* h = hvalue(ra);
// TODO: we really don't need this anymore
if (!ttistable(ra))
return NULL; // temporary workaround to weaken a rather powerful exploitation primitive in case of a MITM attack on bytecode
int last = index + c - 1;
if (last > h->sizearray)
{
VM_PROTECT_PC(); // luaH_resizearray may fail due to OOM
luaH_resizearray(L, h, last);
}
TValue* array = h->array;
for (int i = 0; i < c; ++i)
setobj2t(L, &array[index + i - 1], rb + i);
luaC_barrierfast(L, h);
return pc;
}
const Instruction* execute_LOP_FORGPREP(lua_State* L, const Instruction* pc, StkId base, TValue* k)
{
[[maybe_unused]] Closure* cl = clvalue(L->ci->func);

View File

@ -16,6 +16,7 @@ const Instruction* execute_LOP_GETTABLEKS(lua_State* L, const Instruction* pc, S
const Instruction* execute_LOP_SETTABLEKS(lua_State* L, const Instruction* pc, StkId base, TValue* k);
const Instruction* execute_LOP_NEWCLOSURE(lua_State* L, const Instruction* pc, StkId base, TValue* k);
const Instruction* execute_LOP_NAMECALL(lua_State* L, const Instruction* pc, StkId base, TValue* k);
const Instruction* execute_LOP_SETLIST(lua_State* L, const Instruction* pc, StkId base, TValue* k);
const Instruction* execute_LOP_FORGPREP(lua_State* L, const Instruction* pc, StkId base, TValue* k);
const Instruction* execute_LOP_GETVARARGS(lua_State* L, const Instruction* pc, StkId base, TValue* k);
const Instruction* execute_LOP_DUPCLOSURE(lua_State* L, const Instruction* pc, StkId base, TValue* k);

View File

@ -354,6 +354,8 @@ static RegisterSet computeBlockLiveInRegSet(IrFunction& function, const IrBlock&
case IrCmd::RETURN:
useRange(vmRegOp(inst.a), function.intOp(inst.b));
break;
// TODO: FASTCALL is more restrictive than INVOKE_FASTCALL; we should either determine the exact semantics, or rework it
case IrCmd::FASTCALL:
case IrCmd::INVOKE_FASTCALL:
if (int count = function.intOp(inst.e); count != -1)

View File

@ -468,7 +468,8 @@ void IrBuilder::clone(const IrBlock& source, bool removeCurrentTerminator)
IrInst clone = function.instructions[index];
// Skip pseudo instructions to make clone more compact, but validate that they have no users
if (isPseudo(clone.cmd))
// But if substitution tracks a location, that tracking has to be preserved
if (isPseudo(clone.cmd) && !(clone.cmd == IrCmd::SUBSTITUTE && clone.b.kind != IrOpKind::None))
{
LUAU_ASSERT(clone.useCount == 0);
continue;

View File

@ -13,6 +13,10 @@ namespace CodeGen
namespace X64
{
static const std::array<OperandX64, 6> kWindowsGprOrder = {rcx, rdx, r8, r9, addr[rsp + 32], addr[rsp + 40]};
static const std::array<OperandX64, 6> kSystemvGprOrder = {rdi, rsi, rdx, rcx, r8, r9};
static const std::array<OperandX64, 4> kXmmOrder = {xmm0, xmm1, xmm2, xmm3}; // Common order for first 4 fp arguments on Windows/SystemV
static bool sameUnderlyingRegister(RegisterX64 a, RegisterX64 b)
{
SizeX64 underlyingSizeA = a.size == SizeX64::xmmword ? SizeX64::xmmword : SizeX64::qword;
@ -37,21 +41,35 @@ void IrCallWrapperX64::addArgument(SizeX64 targetSize, OperandX64 source, IrOp s
LUAU_ASSERT(instIdx != kInvalidInstIdx || sourceOp.kind == IrOpKind::None);
LUAU_ASSERT(argCount < kMaxCallArguments);
args[argCount++] = {targetSize, source, sourceOp};
CallArgument& arg = args[argCount++];
arg = {targetSize, source, sourceOp};
arg.target = getNextArgumentTarget(targetSize);
if (build.abi == ABIX64::Windows)
{
// On Windows, gpr/xmm register positions move in sync
gprPos++;
xmmPos++;
}
else
{
if (targetSize == SizeX64::xmmword)
xmmPos++;
else
gprPos++;
}
}
void IrCallWrapperX64::addArgument(SizeX64 targetSize, ScopedRegX64& scopedReg)
{
LUAU_ASSERT(argCount < kMaxCallArguments);
args[argCount++] = {targetSize, scopedReg.release(), {}};
addArgument(targetSize, scopedReg.release(), {});
}
void IrCallWrapperX64::call(const OperandX64& func)
{
funcOp = func;
assignTargetRegisters();
countRegisterUses();
for (int i = 0; i < argCount; ++i)
@ -190,44 +208,33 @@ void IrCallWrapperX64::call(const OperandX64& func)
build.call(funcOp);
}
void IrCallWrapperX64::assignTargetRegisters()
RegisterX64 IrCallWrapperX64::suggestNextArgumentRegister(SizeX64 size) const
{
static const std::array<OperandX64, 6> kWindowsGprOrder = {rcx, rdx, r8, r9, addr[rsp + 32], addr[rsp + 40]};
static const std::array<OperandX64, 6> kSystemvGprOrder = {rdi, rsi, rdx, rcx, r8, r9};
OperandX64 target = getNextArgumentTarget(size);
return target.cat == CategoryX64::reg ? regs.takeReg(target.base, kInvalidInstIdx) : regs.allocReg(size, kInvalidInstIdx);
}
OperandX64 IrCallWrapperX64::getNextArgumentTarget(SizeX64 size) const
{
if (size == SizeX64::xmmword)
{
LUAU_ASSERT(size_t(xmmPos) < kXmmOrder.size());
return kXmmOrder[xmmPos];
}
const std::array<OperandX64, 6>& gprOrder = build.abi == ABIX64::Windows ? kWindowsGprOrder : kSystemvGprOrder;
static const std::array<OperandX64, 4> kXmmOrder = {xmm0, xmm1, xmm2, xmm3}; // Common order for first 4 fp arguments on Windows/SystemV
int gprPos = 0;
int xmmPos = 0;
LUAU_ASSERT(size_t(gprPos) < gprOrder.size());
OperandX64 target = gprOrder[gprPos];
for (int i = 0; i < argCount; i++)
{
CallArgument& arg = args[i];
// Keep requested argument size
if (target.cat == CategoryX64::reg)
target.base.size = size;
else if (target.cat == CategoryX64::mem)
target.memSize = size;
if (arg.targetSize == SizeX64::xmmword)
{
LUAU_ASSERT(size_t(xmmPos) < kXmmOrder.size());
arg.target = kXmmOrder[xmmPos++];
if (build.abi == ABIX64::Windows)
gprPos++; // On Windows, gpr/xmm register positions move in sync
}
else
{
LUAU_ASSERT(size_t(gprPos) < gprOrder.size());
arg.target = gprOrder[gprPos++];
if (build.abi == ABIX64::Windows)
xmmPos++; // On Windows, gpr/xmm register positions move in sync
// Keep requested argument size
if (arg.target.cat == CategoryX64::reg)
arg.target.base.size = arg.targetSize;
else if (arg.target.cat == CategoryX64::mem)
arg.target.memSize = arg.targetSize;
}
}
return target;
}
void IrCallWrapperX64::countRegisterUses()
@ -376,7 +383,7 @@ RegisterX64 IrCallWrapperX64::findConflictingTarget() const
void IrCallWrapperX64::renameConflictingRegister(RegisterX64 conflict)
{
// Get a fresh register
RegisterX64 freshReg = conflict.size == SizeX64::xmmword ? regs.allocXmmReg(kInvalidInstIdx) : regs.allocGprReg(conflict.size, kInvalidInstIdx);
RegisterX64 freshReg = regs.allocReg(conflict.size, kInvalidInstIdx);
if (conflict.size == SizeX64::xmmword)
build.vmovsd(freshReg, conflict, conflict);

View File

@ -8,7 +8,6 @@
#include "Luau/IrUtils.h"
#include "EmitCommonA64.h"
#include "EmitInstructionA64.h"
#include "NativeState.h"
#include "lstate.h"
@ -27,13 +26,14 @@ namespace A64
#ifdef TRACE
struct LoweringStatsA64
{
size_t can;
size_t missing;
size_t total;
~LoweringStatsA64()
{
if (total)
printf("A64 lowering succeeded for %.1f%% functions (%d/%d)\n", double(can) / double(total) * 100, int(can), int(total));
printf("A64 lowering succeeded for %.1f%% functions (%d/%d)\n", double(total - missing) / double(total) * 100, int(total - missing),
int(total));
}
} gStatsA64;
#endif
@ -78,32 +78,230 @@ inline ConditionA64 getConditionFP(IrCondition cond)
}
}
// TODO: instead of temp1/temp2 we can take a register that we will use for ra->value; that way callers to this function will be able to use it when
// calling luaC_barrier*
static void checkObjectBarrierConditions(AssemblyBuilderA64& build, RegisterA64 object, RegisterA64 temp1, RegisterA64 temp2, int ra, Label& skip)
static void checkObjectBarrierConditions(AssemblyBuilderA64& build, RegisterA64 object, RegisterA64 temp, int ra, Label& skip)
{
RegisterA64 temp1w = castReg(KindA64::w, temp1);
RegisterA64 temp2w = castReg(KindA64::w, temp2);
RegisterA64 tempw = castReg(KindA64::w, temp);
// iscollectable(ra)
build.ldr(temp1w, mem(rBase, ra * sizeof(TValue) + offsetof(TValue, tt)));
build.cmp(temp1w, LUA_TSTRING);
build.ldr(tempw, mem(rBase, ra * sizeof(TValue) + offsetof(TValue, tt)));
build.cmp(tempw, LUA_TSTRING);
build.b(ConditionA64::Less, skip);
// isblack(obj2gco(o))
// TODO: conditional bit test with BLACKBIT
build.ldrb(temp1w, mem(object, offsetof(GCheader, marked)));
build.mov(temp2w, bitmask(BLACKBIT));
build.and_(temp1w, temp1w, temp2w);
build.cbz(temp1w, skip);
build.ldrb(tempw, mem(object, offsetof(GCheader, marked)));
build.tst(tempw, bitmask(BLACKBIT));
build.b(ConditionA64::Equal, skip); // Equal = Zero after tst
// iswhite(gcvalue(ra))
// TODO: tst with bitmask(WHITE0BIT, WHITE1BIT)
build.ldr(temp1, mem(rBase, ra * sizeof(TValue) + offsetof(TValue, value)));
build.ldrb(temp1w, mem(temp1, offsetof(GCheader, marked)));
build.mov(temp2w, bit2mask(WHITE0BIT, WHITE1BIT));
build.and_(temp1w, temp1w, temp2w);
build.cbz(temp1w, skip);
build.ldr(temp, mem(rBase, ra * sizeof(TValue) + offsetof(TValue, value)));
build.ldrb(tempw, mem(temp, offsetof(GCheader, marked)));
build.tst(tempw, bit2mask(WHITE0BIT, WHITE1BIT));
build.b(ConditionA64::Equal, skip); // Equal = Zero after tst
}
static void emitAddOffset(AssemblyBuilderA64& build, RegisterA64 dst, RegisterA64 src, size_t offset)
{
LUAU_ASSERT(dst != src);
LUAU_ASSERT(offset <= INT_MAX);
if (offset <= AssemblyBuilderA64::kMaxImmediate)
{
build.add(dst, src, uint16_t(offset));
}
else
{
build.mov(dst, int(offset));
build.add(dst, dst, src);
}
}
static void emitFallback(AssemblyBuilderA64& build, int op, int pcpos)
{
// fallback(L, instruction, base, k)
build.mov(x0, rState);
emitAddOffset(build, x1, rCode, pcpos * sizeof(Instruction));
build.mov(x2, rBase);
build.mov(x3, rConstants);
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, fallback) + op * sizeof(FallbackFn)));
build.blr(x4);
emitUpdateBase(build);
}
static void emitInvokeLibm1(AssemblyBuilderA64& build, size_t func, int res, int arg)
{
build.ldr(d0, mem(rBase, arg * sizeof(TValue) + offsetof(TValue, value.n)));
build.ldr(x0, mem(rNativeContext, uint32_t(func)));
build.blr(x0);
build.str(d0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n)));
}
static void emitInvokeLibm2(AssemblyBuilderA64& build, size_t func, int res, int arg, IrOp args, bool argsInt = false)
{
if (args.kind == IrOpKind::VmReg)
build.ldr(d1, mem(rBase, args.index * sizeof(TValue) + offsetof(TValue, value.n)));
else if (args.kind == IrOpKind::VmConst)
{
size_t constantOffset = args.index * sizeof(TValue) + offsetof(TValue, value.n);
// Note: cumulative offset is guaranteed to be divisible by 8 (since we're loading a double); we can use that to expand the useful range that
// doesn't require temporaries
if (constantOffset / 8 <= AddressA64::kMaxOffset)
{
build.ldr(d1, mem(rConstants, int(constantOffset)));
}
else
{
emitAddOffset(build, x0, rConstants, constantOffset);
build.ldr(d1, x0);
}
}
else
LUAU_ASSERT(!"Unsupported instruction form");
if (argsInt)
build.fcvtzs(w0, d1);
build.ldr(d0, mem(rBase, arg * sizeof(TValue) + offsetof(TValue, value.n)));
build.ldr(x1, mem(rNativeContext, uint32_t(func)));
build.blr(x1);
build.str(d0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n)));
}
static void emitInvokeLibm1P(AssemblyBuilderA64& build, size_t func, int arg)
{
build.ldr(d0, mem(rBase, arg * sizeof(TValue) + offsetof(TValue, value.n)));
build.add(x0, sp, sTemporary.data); // sp-relative offset
build.ldr(x1, mem(rNativeContext, uint32_t(func)));
build.blr(x1);
}
static bool emitBuiltin(AssemblyBuilderA64& build, IrRegAllocA64& regs, int bfid, int res, int arg, IrOp args, int nparams, int nresults)
{
switch (bfid)
{
case LBF_MATH_EXP:
LUAU_ASSERT(nparams == 1 && nresults == 1);
emitInvokeLibm1(build, offsetof(NativeContext, libm_exp), res, arg);
return true;
case LBF_MATH_FMOD:
LUAU_ASSERT(nparams == 2 && nresults == 1);
emitInvokeLibm2(build, offsetof(NativeContext, libm_fmod), res, arg, args);
return true;
case LBF_MATH_ASIN:
LUAU_ASSERT(nparams == 1 && nresults == 1);
emitInvokeLibm1(build, offsetof(NativeContext, libm_asin), res, arg);
return true;
case LBF_MATH_SIN:
LUAU_ASSERT(nparams == 1 && nresults == 1);
emitInvokeLibm1(build, offsetof(NativeContext, libm_sin), res, arg);
return true;
case LBF_MATH_SINH:
LUAU_ASSERT(nparams == 1 && nresults == 1);
emitInvokeLibm1(build, offsetof(NativeContext, libm_sinh), res, arg);
return true;
case LBF_MATH_ACOS:
LUAU_ASSERT(nparams == 1 && nresults == 1);
emitInvokeLibm1(build, offsetof(NativeContext, libm_acos), res, arg);
return true;
case LBF_MATH_COS:
LUAU_ASSERT(nparams == 1 && nresults == 1);
emitInvokeLibm1(build, offsetof(NativeContext, libm_cos), res, arg);
return true;
case LBF_MATH_COSH:
LUAU_ASSERT(nparams == 1 && nresults == 1);
emitInvokeLibm1(build, offsetof(NativeContext, libm_cosh), res, arg);
return true;
case LBF_MATH_ATAN:
LUAU_ASSERT(nparams == 1 && nresults == 1);
emitInvokeLibm1(build, offsetof(NativeContext, libm_atan), res, arg);
return true;
case LBF_MATH_TAN:
LUAU_ASSERT(nparams == 1 && nresults == 1);
emitInvokeLibm1(build, offsetof(NativeContext, libm_tan), res, arg);
return true;
case LBF_MATH_TANH:
LUAU_ASSERT(nparams == 1 && nresults == 1);
emitInvokeLibm1(build, offsetof(NativeContext, libm_tanh), res, arg);
return true;
case LBF_MATH_ATAN2:
LUAU_ASSERT(nparams == 2 && nresults == 1);
emitInvokeLibm2(build, offsetof(NativeContext, libm_atan2), res, arg, args);
return true;
case LBF_MATH_LOG10:
LUAU_ASSERT(nparams == 1 && nresults == 1);
emitInvokeLibm1(build, offsetof(NativeContext, libm_log10), res, arg);
return true;
case LBF_MATH_LOG:
LUAU_ASSERT((nparams == 1 || nparams == 2) && nresults == 1);
// TODO: IR builtin lowering assumes that the only valid 2-argument call is log2; ideally, we use a less hacky way to indicate that
if (nparams == 2)
emitInvokeLibm1(build, offsetof(NativeContext, libm_log2), res, arg);
else
emitInvokeLibm1(build, offsetof(NativeContext, libm_log), res, arg);
return true;
case LBF_MATH_LDEXP:
LUAU_ASSERT(nparams == 2 && nresults == 1);
emitInvokeLibm2(build, offsetof(NativeContext, libm_ldexp), res, arg, args, /* argsInt= */ true);
return true;
case LBF_MATH_FREXP:
LUAU_ASSERT(nparams == 1 && (nresults == 1 || nresults == 2));
emitInvokeLibm1P(build, offsetof(NativeContext, libm_frexp), arg);
build.str(d0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n)));
if (nresults == 2)
{
build.ldr(w0, sTemporary);
build.scvtf(d1, w0);
build.str(d1, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, value.n)));
}
return true;
case LBF_MATH_MODF:
LUAU_ASSERT(nparams == 1 && (nresults == 1 || nresults == 2));
emitInvokeLibm1P(build, offsetof(NativeContext, libm_modf), arg);
build.ldr(d1, sTemporary);
build.str(d1, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n)));
if (nresults == 2)
build.str(d0, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, value.n)));
return true;
case LBF_MATH_SIGN:
LUAU_ASSERT(nparams == 1 && nresults == 1);
// TODO: this can be improved with fmov(constant), for now we just load from memory
build.ldr(d0, mem(rBase, arg * sizeof(TValue) + offsetof(TValue, value.n)));
build.fcmpz(d0);
build.adr(x0, 0.0);
build.ldr(d0, x0);
build.adr(x0, 1.0);
build.ldr(d1, x0);
build.fcsel(d0, d1, d0, getConditionFP(IrCondition::Greater));
build.adr(x0, -1.0);
build.ldr(d1, x0);
build.fcsel(d0, d1, d0, getConditionFP(IrCondition::Less));
build.str(d0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n)));
return true;
case LBF_TYPE:
build.ldr(w0, mem(rBase, arg * sizeof(TValue) + offsetof(TValue, tt)));
build.ldr(x1, mem(rState, offsetof(lua_State, global)));
// TODO: this can use load with shifted/extended offset
LUAU_ASSERT(sizeof(TString*) == 8);
build.add(x1, x1, zextReg(w0), 3);
build.ldr(x0, mem(x1, offsetof(global_State, ttname)));
build.str(x0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.gc)));
return true;
case LBF_TYPEOF:
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(arg * sizeof(TValue)));
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaT_objtypenamestr)));
build.blr(x2);
build.str(x0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.gc)));
return true;
default:
LUAU_ASSERT(!"Missing A64 lowering");
return false;
}
}
IrLoweringA64::IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, NativeState& data, Proto* proto, IrFunction& function)
@ -116,119 +314,10 @@ IrLoweringA64::IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers,
{
// In order to allocate registers during lowering, we need to know where instruction results are last used
updateLastUseLocations(function);
}
// TODO: Eventually this can go away
bool IrLoweringA64::canLower(const IrFunction& function)
{
#ifdef TRACE
gStatsA64.total++;
#endif
for (const IrInst& inst : function.instructions)
{
switch (inst.cmd)
{
case IrCmd::NOP:
case IrCmd::LOAD_TAG:
case IrCmd::LOAD_POINTER:
case IrCmd::LOAD_DOUBLE:
case IrCmd::LOAD_INT:
case IrCmd::LOAD_TVALUE:
case IrCmd::LOAD_NODE_VALUE_TV:
case IrCmd::LOAD_ENV:
case IrCmd::GET_ARR_ADDR:
case IrCmd::GET_SLOT_NODE_ADDR:
case IrCmd::GET_HASH_NODE_ADDR:
case IrCmd::STORE_TAG:
case IrCmd::STORE_POINTER:
case IrCmd::STORE_DOUBLE:
case IrCmd::STORE_INT:
case IrCmd::STORE_TVALUE:
case IrCmd::STORE_NODE_VALUE_TV:
case IrCmd::ADD_INT:
case IrCmd::SUB_INT:
case IrCmd::ADD_NUM:
case IrCmd::SUB_NUM:
case IrCmd::MUL_NUM:
case IrCmd::DIV_NUM:
case IrCmd::MOD_NUM:
case IrCmd::POW_NUM:
case IrCmd::MIN_NUM:
case IrCmd::MAX_NUM:
case IrCmd::UNM_NUM:
case IrCmd::FLOOR_NUM:
case IrCmd::CEIL_NUM:
case IrCmd::ROUND_NUM:
case IrCmd::SQRT_NUM:
case IrCmd::ABS_NUM:
case IrCmd::JUMP:
case IrCmd::JUMP_IF_TRUTHY:
case IrCmd::JUMP_IF_FALSY:
case IrCmd::JUMP_EQ_TAG:
case IrCmd::JUMP_EQ_INT:
case IrCmd::JUMP_EQ_POINTER:
case IrCmd::JUMP_CMP_NUM:
case IrCmd::JUMP_CMP_ANY:
case IrCmd::TABLE_LEN:
case IrCmd::NEW_TABLE:
case IrCmd::DUP_TABLE:
case IrCmd::TRY_NUM_TO_INDEX:
case IrCmd::INT_TO_NUM:
case IrCmd::ADJUST_STACK_TO_REG:
case IrCmd::ADJUST_STACK_TO_TOP:
case IrCmd::INVOKE_FASTCALL:
case IrCmd::CHECK_FASTCALL_RES:
case IrCmd::DO_ARITH:
case IrCmd::DO_LEN:
case IrCmd::GET_TABLE:
case IrCmd::SET_TABLE:
case IrCmd::GET_IMPORT:
case IrCmd::CONCAT:
case IrCmd::GET_UPVALUE:
case IrCmd::SET_UPVALUE:
case IrCmd::PREPARE_FORN:
case IrCmd::CHECK_TAG:
case IrCmd::CHECK_READONLY:
case IrCmd::CHECK_NO_METATABLE:
case IrCmd::CHECK_SAFE_ENV:
case IrCmd::CHECK_ARRAY_SIZE:
case IrCmd::CHECK_SLOT_MATCH:
case IrCmd::INTERRUPT:
case IrCmd::CHECK_GC:
case IrCmd::BARRIER_OBJ:
case IrCmd::BARRIER_TABLE_BACK:
case IrCmd::BARRIER_TABLE_FORWARD:
case IrCmd::SET_SAVEDPC:
case IrCmd::CLOSE_UPVALS:
case IrCmd::CAPTURE:
case IrCmd::CALL:
case IrCmd::RETURN:
case IrCmd::FALLBACK_GETGLOBAL:
case IrCmd::FALLBACK_SETGLOBAL:
case IrCmd::FALLBACK_GETTABLEKS:
case IrCmd::FALLBACK_SETTABLEKS:
case IrCmd::FALLBACK_NAMECALL:
case IrCmd::FALLBACK_PREPVARARGS:
case IrCmd::FALLBACK_GETVARARGS:
case IrCmd::FALLBACK_NEWCLOSURE:
case IrCmd::FALLBACK_DUPCLOSURE:
case IrCmd::SUBSTITUTE:
continue;
default:
#ifdef TRACE
printf("A64 lowering missing %s\n", getCmdName(inst.cmd));
#endif
return false;
}
}
#ifdef TRACE
gStatsA64.can++;
#endif
return true;
}
void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
@ -245,14 +334,14 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
case IrCmd::LOAD_POINTER:
{
inst.regA64 = regs.allocReg(KindA64::x);
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value.gc));
build.ldr(inst.regA64, addr);
break;
}
case IrCmd::LOAD_DOUBLE:
{
inst.regA64 = regs.allocReg(KindA64::d);
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value.n));
build.ldr(inst.regA64, addr);
break;
}
@ -287,13 +376,21 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
if (inst.b.kind == IrOpKind::Inst)
{
// TODO: This is a temporary hack that reads wN register as if it was xN. This should use unsigned extension shift once we support it.
build.add(inst.regA64, inst.regA64, castReg(KindA64::x, regOp(inst.b)), kTValueSizeLog2);
build.add(inst.regA64, inst.regA64, zextReg(regOp(inst.b)), kTValueSizeLog2);
}
else if (inst.b.kind == IrOpKind::Constant)
{
LUAU_ASSERT(size_t(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate >> kTValueSizeLog2); // TODO: handle out of range values
build.add(inst.regA64, inst.regA64, uint16_t(intOp(inst.b) << kTValueSizeLog2));
// TODO: refactor into a common helper? can't use emitAddOffset because we need a temp register
if (intOp(inst.b) * sizeof(TValue) <= AssemblyBuilderA64::kMaxImmediate)
{
build.add(inst.regA64, inst.regA64, uint16_t(intOp(inst.b) * sizeof(TValue)));
}
else
{
RegisterA64 temp = regs.allocTemp(KindA64::x);
build.mov(temp, intOp(inst.b) * sizeof(TValue));
build.add(inst.regA64, inst.regA64, temp);
}
}
else
LUAU_ASSERT(!"Unsupported instruction form");
@ -314,8 +411,7 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
// note: this may clobber inst.a, so it's important that we don't use it after this
build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(Table, node)));
// TODO: This is a temporary hack that reads wN register as if it was xN. This should use unsigned extension shift once we support it.
build.add(inst.regA64, inst.regA64, castReg(KindA64::x, temp2), kLuaNodeSizeLog2);
build.add(inst.regA64, inst.regA64, zextReg(temp2), kLuaNodeSizeLog2);
break;
}
case IrCmd::GET_HASH_NODE_ADDR:
@ -324,18 +420,16 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
RegisterA64 temp1 = regs.allocTemp(KindA64::w);
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
// TODO: this can use bic (andnot) to do hash & ~(-1 << lsizenode) instead but we don't support it yet
build.mov(temp1, 1);
// hash & ((1 << lsizenode) - 1) == hash & ~(-1 << lsizenode)
build.mov(temp1, -1);
build.ldrb(temp2, mem(regOp(inst.a), offsetof(Table, lsizenode)));
build.lsl(temp1, temp1, temp2);
build.sub(temp1, temp1, 1);
build.mov(temp2, uintOp(inst.b));
build.and_(temp2, temp2, temp1);
build.bic(temp2, temp2, temp1);
// note: this may clobber inst.a, so it's important that we don't use it after this
build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(Table, node)));
// TODO: This is a temporary hack that reads wN register as if it was xN. This should use unsigned extension shift once we support it.
build.add(inst.regA64, inst.regA64, castReg(KindA64::x, temp2), kLuaNodeSizeLog2);
build.add(inst.regA64, inst.regA64, zextReg(temp2), kLuaNodeSizeLog2);
break;
}
case IrCmd::STORE_TAG:
@ -501,6 +595,37 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
build.fabs(inst.regA64, temp);
break;
}
case IrCmd::NOT_ANY:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Constant)
{
// other cases should've been constant folded
LUAU_ASSERT(tagOp(inst.a) == LUA_TBOOLEAN);
build.eor(inst.regA64, regOp(inst.b), 1);
}
else
{
Label notbool, exit;
// use the fact that NIL is the only value less than BOOLEAN to do two tag comparisons at once
LUAU_ASSERT(LUA_TNIL == 0 && LUA_TBOOLEAN == 1);
build.cmp(regOp(inst.a), LUA_TBOOLEAN);
build.b(ConditionA64::NotEqual, notbool);
// boolean => invert value
build.eor(inst.regA64, regOp(inst.b), 1);
build.b(exit);
// not boolean => result is true iff tag was nil
build.setLabel(notbool);
build.cset(inst.regA64, ConditionA64::Less);
build.setLabel(exit);
}
break;
}
case IrCmd::JUMP:
jumpOrFallthrough(blockOp(inst.a), next);
break;
@ -537,10 +662,12 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
break;
}
case IrCmd::JUMP_EQ_TAG:
if (inst.b.kind == IrOpKind::Constant)
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant)
build.cmp(regOp(inst.a), tagOp(inst.b));
else if (inst.b.kind == IrOpKind::Inst)
else if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Inst)
build.cmp(regOp(inst.a), regOp(inst.b));
else if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Inst)
build.cmp(regOp(inst.b), tagOp(inst.a));
else
LUAU_ASSERT(!"Unsupported instruction form");
@ -570,10 +697,20 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
{
IrCondition cond = conditionOp(inst.c);
RegisterA64 temp1 = tempDouble(inst.a);
RegisterA64 temp2 = tempDouble(inst.b);
if (inst.b.kind == IrOpKind::Constant && doubleOp(inst.b) == 0.0)
{
RegisterA64 temp = tempDouble(inst.a);
build.fcmpz(temp);
}
else
{
RegisterA64 temp1 = tempDouble(inst.a);
RegisterA64 temp2 = tempDouble(inst.b);
build.fcmp(temp1, temp2);
}
build.fcmp(temp1, temp2);
build.b(getConditionFP(cond), labelOp(inst.d));
jumpOrFallthrough(blockOp(inst.e), next);
break;
@ -607,6 +744,30 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
jumpOrFallthrough(blockOp(inst.e), next);
break;
}
case IrCmd::JUMP_SLOT_MATCH:
{
// TODO: share code with CHECK_SLOT_MATCH
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp1w = castReg(KindA64::w, temp1);
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
build.ldr(temp1w, mem(regOp(inst.a), offsetof(LuaNode, key) + kOffsetOfTKeyTag));
build.and_(temp1w, temp1w, kLuaNodeTagMask);
build.cmp(temp1w, LUA_TSTRING);
build.b(ConditionA64::NotEqual, labelOp(inst.d));
AddressA64 addr = tempAddr(inst.b, offsetof(TValue, value));
build.ldr(temp1, mem(regOp(inst.a), offsetof(LuaNode, key.value)));
build.ldr(temp2, addr);
build.cmp(temp1, temp2);
build.b(ConditionA64::NotEqual, labelOp(inst.d));
build.ldr(temp1w, mem(regOp(inst.a), offsetof(LuaNode, val.tt)));
LUAU_ASSERT(LUA_TNIL == 0);
build.cbz(temp1w, labelOp(inst.d));
jumpOrFallthrough(blockOp(inst.c), next);
break;
}
case IrCmd::TABLE_LEN:
{
regs.assertAllFreeExcept(regOp(inst.a));
@ -664,6 +825,32 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
}
break;
}
case IrCmd::TRY_CALL_FASTGETTM:
{
regs.assertAllFreeExcept(regOp(inst.a));
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
build.ldr(temp1, mem(regOp(inst.a), offsetof(Table, metatable)));
build.cbz(temp1, labelOp(inst.c)); // no metatable
build.ldrb(temp2, mem(temp1, offsetof(Table, tmcache)));
build.tst(temp2, 1 << intOp(inst.b)); // can't use tbz/tbnz because their jump offsets are too short
build.b(ConditionA64::NotEqual, labelOp(inst.c)); // Equal = Zero after tst; tmcache caches *absence* of metamethods
build.mov(x0, temp1);
build.mov(w1, intOp(inst.b));
build.ldr(x2, mem(rState, offsetof(lua_State, global)));
build.ldr(x2, mem(x2, offsetof(global_State, tmname) + intOp(inst.b) * sizeof(TString*)));
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaT_gettm)));
build.blr(x3);
// TODO: we could takeReg x0 but it's unclear if we will be able to keep x0 allocatable due to aliasing concerns
inst.regA64 = regs.allocReg(KindA64::x);
build.mov(inst.regA64, x0);
break;
}
case IrCmd::INT_TO_NUM:
{
inst.regA64 = regs.allocReg(KindA64::d);
@ -683,8 +870,7 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
else if (inst.b.kind == IrOpKind::Inst)
{
build.add(temp, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
// TODO: This is a temporary hack that reads wN register as if it was xN. This should use unsigned extension shift once we support it.
build.add(temp, temp, castReg(KindA64::x, regOp(inst.b)), kTValueSizeLog2);
build.add(temp, temp, zextReg(regOp(inst.b)), kTValueSizeLog2);
build.str(temp, mem(rState, offsetof(lua_State, top)));
}
else
@ -699,6 +885,12 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
build.str(temp, mem(rState, offsetof(lua_State, top)));
break;
}
case IrCmd::FASTCALL:
regs.assertAllFree();
// TODO: emitBuiltin should be exhaustive
if (!emitBuiltin(build, regs, uintOp(inst.a), vmRegOp(inst.b), vmRegOp(inst.c), inst.d, intOp(inst.e), intOp(inst.f)))
error = true;
break;
case IrCmd::INVOKE_FASTCALL:
{
regs.assertAllFree();
@ -710,18 +902,7 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
if (inst.d.kind == IrOpKind::VmReg)
build.add(x4, rBase, uint16_t(vmRegOp(inst.d) * sizeof(TValue)));
else if (inst.d.kind == IrOpKind::VmConst)
{
// TODO: refactor into a common helper
if (vmConstOp(inst.d) * sizeof(TValue) <= AssemblyBuilderA64::kMaxImmediate)
{
build.add(x4, rConstants, uint16_t(vmConstOp(inst.d) * sizeof(TValue)));
}
else
{
build.mov(x4, vmConstOp(inst.d) * sizeof(TValue));
build.add(x4, rConstants, x4);
}
}
emitAddOffset(build, x4, rConstants, vmConstOp(inst.d) * sizeof(TValue));
else
LUAU_ASSERT(boolOp(inst.d) == false);
@ -742,7 +923,7 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
build.ldr(x6, mem(rNativeContext, offsetof(NativeContext, luauF_table) + uintOp(inst.a) * sizeof(luau_FastFunction)));
build.blr(x6);
// TODO: we could takeReg w0 but it's unclear if we will be able to keep x0 allocatable due to aliasing concerns
// since w0 came from a call, we need to move it so that we don't violate zextReg safety contract
inst.regA64 = regs.allocReg(KindA64::w);
build.mov(inst.regA64, w0);
break;
@ -758,18 +939,7 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
if (inst.c.kind == IrOpKind::VmConst)
{
// TODO: refactor into a common helper
if (vmConstOp(inst.c) * sizeof(TValue) <= AssemblyBuilderA64::kMaxImmediate)
{
build.add(x3, rConstants, uint16_t(vmConstOp(inst.c) * sizeof(TValue)));
}
else
{
build.mov(x3, vmConstOp(inst.c) * sizeof(TValue));
build.add(x3, rConstants, x3);
}
}
emitAddOffset(build, x3, rConstants, vmConstOp(inst.c) * sizeof(TValue));
else
build.add(x3, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue)));
@ -835,7 +1005,25 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
break;
case IrCmd::GET_IMPORT:
regs.assertAllFree();
emitInstGetImport(build, vmRegOp(inst.a), uintOp(inst.b));
// luaV_getimport(L, cl->env, k, aux, /* propagatenil= */ false)
build.mov(x0, rState);
build.ldr(x1, mem(rClosure, offsetof(Closure, env)));
build.mov(x2, rConstants);
build.mov(w3, uintOp(inst.b));
build.mov(w4, 0);
build.ldr(x5, mem(rNativeContext, offsetof(NativeContext, luaV_getimport)));
build.blr(x5);
emitUpdateBase(build);
// setobj2s(L, ra, L->top - 1)
build.ldr(x0, mem(rState, offsetof(lua_State, top)));
build.sub(x0, x0, sizeof(TValue));
build.ldr(q0, x0);
build.str(q0, mem(rBase, vmRegOp(inst.a) * sizeof(TValue)));
// L->top--
build.str(x0, mem(rState, offsetof(lua_State, top)));
break;
case IrCmd::CONCAT:
regs.assertAllFree();
@ -877,7 +1065,6 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
RegisterA64 temp3 = regs.allocTemp(KindA64::q);
RegisterA64 temp4 = regs.allocTemp(KindA64::x);
// UpVal*
build.ldr(temp1, mem(rClosure, offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.a) + offsetof(TValue, value.gc)));
@ -887,7 +1074,7 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
build.str(temp3, temp2);
Label skip;
checkObjectBarrierConditions(build, temp1, temp2, temp4, vmRegOp(inst.b), skip);
checkObjectBarrierConditions(build, temp1, temp2, vmRegOp(inst.b), skip);
build.mov(x0, rState);
build.mov(x1, temp1); // TODO: aliasing hazard
@ -945,8 +1132,17 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
build.cmp(temp, regOp(inst.b));
else if (inst.b.kind == IrOpKind::Constant)
{
LUAU_ASSERT(size_t(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate); // TODO: handle out of range values
build.cmp(temp, uint16_t(intOp(inst.b)));
// TODO: refactor into a common helper?
if (size_t(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate)
{
build.cmp(temp, uint16_t(intOp(inst.b)));
}
else
{
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
build.mov(temp2, intOp(inst.b));
build.cmp(temp, temp2);
}
}
else
LUAU_ASSERT(!"Unsupported instruction form");
@ -959,12 +1155,9 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp1w = castReg(KindA64::w, temp1);
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
RegisterA64 temp2w = castReg(KindA64::w, temp2);
build.ldr(temp1w, mem(regOp(inst.a), kOffsetOfLuaNodeTag));
// TODO: this needs bitfield extraction, or and-immediate
build.mov(temp2w, kLuaNodeTagMask);
build.and_(temp1w, temp1w, temp2w);
build.ldr(temp1w, mem(regOp(inst.a), offsetof(LuaNode, key) + kOffsetOfTKeyTag));
build.and_(temp1w, temp1w, kLuaNodeTagMask);
build.cmp(temp1w, LUA_TSTRING);
build.b(ConditionA64::NotEqual, labelOp(inst.c));
@ -979,6 +1172,15 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
build.cbz(temp1w, labelOp(inst.c));
break;
}
case IrCmd::CHECK_NODE_NO_NEXT:
{
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.ldr(temp, mem(regOp(inst.a), offsetof(LuaNode, key) + kOffsetOfTKeyNext));
build.and_(temp, temp, ~((1u << kNextBitOffset) - 1)); // TODO: this would be cleaner with a right shift
build.cbnz(temp, labelOp(inst.b));
break;
}
case IrCmd::INTERRUPT:
{
unsigned int pcpos = uintOp(inst.a);
@ -1023,11 +1225,10 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
{
regs.assertAllFreeExcept(regOp(inst.a));
Label skip;
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
RegisterA64 temp = regs.allocTemp(KindA64::x);
checkObjectBarrierConditions(build, regOp(inst.a), temp1, temp2, vmRegOp(inst.b), skip);
Label skip;
checkObjectBarrierConditions(build, regOp(inst.a), temp, vmRegOp(inst.b), skip);
build.mov(x0, rState);
build.mov(x1, regOp(inst.a)); // TODO: aliasing hazard
@ -1044,15 +1245,13 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
regs.assertAllFreeExcept(regOp(inst.a));
Label skip;
RegisterA64 temp1 = regs.allocTemp(KindA64::w);
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
RegisterA64 temp = regs.allocTemp(KindA64::w);
// isblack(obj2gco(t))
build.ldrb(temp1, mem(regOp(inst.a), offsetof(GCheader, marked)));
build.ldrb(temp, mem(regOp(inst.a), offsetof(GCheader, marked)));
// TODO: conditional bit test with BLACKBIT
build.mov(temp2, bitmask(BLACKBIT));
build.and_(temp1, temp1, temp2);
build.cbz(temp1, skip);
build.tst(temp, bitmask(BLACKBIT));
build.b(ConditionA64::Equal, skip); // Equal = Zero after tst
build.mov(x0, rState);
build.mov(x1, regOp(inst.a)); // TODO: aliasing hazard here and below
@ -1068,11 +1267,10 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
{
regs.assertAllFreeExcept(regOp(inst.a));
Label skip;
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
RegisterA64 temp = regs.allocTemp(KindA64::x);
checkObjectBarrierConditions(build, regOp(inst.a), temp1, temp2, vmRegOp(inst.b), skip);
Label skip;
checkObjectBarrierConditions(build, regOp(inst.a), temp, vmRegOp(inst.b), skip);
build.mov(x0, rState);
build.mov(x1, regOp(inst.a)); // TODO: aliasing hazard
@ -1086,21 +1284,10 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
}
case IrCmd::SET_SAVEDPC:
{
unsigned int pcpos = uintOp(inst.a);
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
// TODO: refactor into a common helper
if (pcpos * sizeof(Instruction) <= AssemblyBuilderA64::kMaxImmediate)
{
build.add(temp1, rCode, uint16_t(pcpos * sizeof(Instruction)));
}
else
{
build.mov(temp1, pcpos * sizeof(Instruction));
build.add(temp1, rCode, temp1);
}
emitAddOffset(build, temp1, rCode, uintOp(inst.a) * sizeof(Instruction));
build.ldr(temp2, mem(rState, offsetof(lua_State, ci)));
build.str(temp1, mem(temp2, offsetof(CallInfo, savedpc)));
break;
@ -1133,14 +1320,100 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
case IrCmd::CAPTURE:
// no-op
break;
case IrCmd::SETLIST:
regs.assertAllFree();
emitFallback(build, LOP_SETLIST, uintOp(inst.a));
break;
case IrCmd::CALL:
regs.assertAllFree();
emitInstCall(build, helpers, vmRegOp(inst.a), intOp(inst.b), intOp(inst.c));
// argtop = (nparams == LUA_MULTRET) ? L->top : ra + 1 + nparams;
if (intOp(inst.b) == LUA_MULTRET)
build.ldr(x2, mem(rState, offsetof(lua_State, top)));
else
build.add(x2, rBase, uint16_t((vmRegOp(inst.a) + 1 + intOp(inst.b)) * sizeof(TValue)));
// callFallback(L, ra, argtop, nresults)
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.mov(w3, intOp(inst.c));
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, callFallback)));
build.blr(x4);
// reentry with x0=closure (NULL will trigger exit)
build.b(helpers.reentry);
break;
case IrCmd::RETURN:
regs.assertAllFree();
emitInstReturn(build, helpers, vmRegOp(inst.a), intOp(inst.b));
// callFallback(L, ra, n)
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.mov(w2, intOp(inst.b));
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, returnFallback)));
build.blr(x3);
// reentry with x0=closure (NULL will trigger exit)
build.b(helpers.reentry);
break;
case IrCmd::FORGLOOP:
// register layout: ra + 1 = table, ra + 2 = internal index, ra + 3 .. ra + aux = iteration variables
regs.assertAllFree();
// clear extra variables since we might have more than two
if (intOp(inst.b) > 2)
{
build.mov(w0, LUA_TNIL);
for (int i = 2; i < intOp(inst.b); ++i)
build.str(w0, mem(rBase, (vmRegOp(inst.a) + 3 + i) * sizeof(TValue) + offsetof(TValue, tt)));
}
// we use full iter fallback for now; in the future it could be worthwhile to accelerate array iteration here
build.mov(x0, rState);
build.ldr(x1, mem(rBase, (vmRegOp(inst.a) + 1) * sizeof(TValue) + offsetof(TValue, value.gc)));
build.ldr(w2, mem(rBase, (vmRegOp(inst.a) + 2) * sizeof(TValue) + offsetof(TValue, value.p)));
build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, forgLoopTableIter)));
build.blr(x4);
// note: no emitUpdateBase necessary because forgLoopTableIter does not reallocate stack
build.cbnz(w0, labelOp(inst.c));
jumpOrFallthrough(blockOp(inst.d), next);
break;
case IrCmd::FORGLOOP_FALLBACK:
regs.assertAllFree();
build.mov(x0, rState);
build.mov(w1, vmRegOp(inst.a));
build.mov(w2, intOp(inst.b));
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, forgLoopNonTableFallback)));
build.blr(x3);
emitUpdateBase(build);
build.cbnz(w0, labelOp(inst.c));
jumpOrFallthrough(blockOp(inst.d), next);
break;
case IrCmd::FORGPREP_XNEXT_FALLBACK:
regs.assertAllFree();
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
build.mov(w2, uintOp(inst.a) + 1);
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, forgPrepXnextFallback)));
build.blr(x3);
// note: no emitUpdateBase necessary because forgLoopNonTableFallback does not reallocate stack
jumpOrFallthrough(blockOp(inst.c), next);
break;
case IrCmd::COVERAGE:
{
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
RegisterA64 temp3 = regs.allocTemp(KindA64::w);
build.mov(temp1, uintOp(inst.a) * sizeof(Instruction));
build.ldr(temp2, mem(rCode, temp1));
// increments E (high 24 bits); if the result overflows a 23-bit counter, high bit becomes 1
// note: cmp can be eliminated with adds but we aren't concerned with code size for coverage
build.add(temp3, temp2, 256);
build.cmp(temp3, 0);
build.csel(temp2, temp2, temp3, ConditionA64::Less);
build.str(temp2, mem(rCode, temp1));
break;
}
// Full instruction fallbacks
case IrCmd::FALLBACK_GETGLOBAL:
@ -1208,9 +1481,25 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
regs.assertAllFree();
emitFallback(build, LOP_DUPCLOSURE, uintOp(inst.a));
break;
case IrCmd::FALLBACK_FORGPREP:
regs.assertAllFree();
emitFallback(build, LOP_FORGPREP, uintOp(inst.a));
jumpOrFallthrough(blockOp(inst.c), next);
break;
default:
LUAU_ASSERT(!"Not supported yet");
// Pseudo instructions
case IrCmd::NOP:
case IrCmd::SUBSTITUTE:
LUAU_ASSERT(!"Pseudo instructions should not be lowered");
break;
// Unsupported instructions
// Note: when adding implementations for these, please move the case: label so that implemented instructions match the order in IrData.h
case IrCmd::STORE_VECTOR:
#ifdef TRACE
gStatsA64.missing++;
#endif
error = true;
break;
}
@ -1220,7 +1509,7 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
bool IrLoweringA64::hasError() const
{
return false;
return error;
}
bool IrLoweringA64::isFallthroughBlock(IrBlock target, IrBlock next)
@ -1287,17 +1576,7 @@ AddressA64 IrLoweringA64::tempAddr(IrOp op, int offset)
RegisterA64 temp = regs.allocTemp(KindA64::x);
// TODO: refactor into a common helper
if (constantOffset <= AssemblyBuilderA64::kMaxImmediate)
{
build.add(temp, rConstants, uint16_t(constantOffset));
}
else
{
build.mov(temp, int(constantOffset));
build.add(temp, rConstants, temp);
}
emitAddOffset(build, temp, rConstants, constantOffset);
return temp;
}
// If we have a register, we assume it's a pointer to TValue

View File

@ -26,8 +26,6 @@ struct IrLoweringA64
{
IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, NativeState& data, Proto* proto, IrFunction& function);
static bool canLower(const IrFunction& function);
void lowerInst(IrInst& inst, uint32_t index, IrBlock& next);
bool hasError() const;
@ -61,6 +59,8 @@ struct IrLoweringA64
IrFunction& function;
IrRegAllocA64 regs;
bool error = false;
};
} // namespace A64

View File

@ -31,6 +31,8 @@ IrLoweringX64::IrLoweringX64(AssemblyBuilderX64& build, ModuleHelpers& helpers,
{
// In order to allocate registers during lowering, we need to know where instruction results are last used
updateLastUseLocations(function);
build.align(kFunctionAlignment, X64::AlignmentDataX64::Ud2);
}
void IrLoweringX64::storeDoubleAsFloat(OperandX64 dst, IrOp src)
@ -59,7 +61,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
switch (inst.cmd)
{
case IrCmd::LOAD_TAG:
inst.regX64 = regs.allocGprReg(SizeX64::dword, index);
inst.regX64 = regs.allocReg(SizeX64::dword, index);
if (inst.a.kind == IrOpKind::VmReg)
build.mov(inst.regX64, luauRegTag(vmRegOp(inst.a)));
@ -73,7 +75,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
LUAU_ASSERT(!"Unsupported instruction form");
break;
case IrCmd::LOAD_POINTER:
inst.regX64 = regs.allocGprReg(SizeX64::qword, index);
inst.regX64 = regs.allocReg(SizeX64::qword, index);
if (inst.a.kind == IrOpKind::VmReg)
build.mov(inst.regX64, luauRegValue(vmRegOp(inst.a)));
@ -87,7 +89,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
LUAU_ASSERT(!"Unsupported instruction form");
break;
case IrCmd::LOAD_DOUBLE:
inst.regX64 = regs.allocXmmReg(index);
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
if (inst.a.kind == IrOpKind::VmReg)
build.vmovsd(inst.regX64, luauRegValue(vmRegOp(inst.a)));
@ -97,12 +99,12 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
LUAU_ASSERT(!"Unsupported instruction form");
break;
case IrCmd::LOAD_INT:
inst.regX64 = regs.allocGprReg(SizeX64::dword, index);
inst.regX64 = regs.allocReg(SizeX64::dword, index);
build.mov(inst.regX64, luauRegValueInt(vmRegOp(inst.a)));
break;
case IrCmd::LOAD_TVALUE:
inst.regX64 = regs.allocXmmReg(index);
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
if (inst.a.kind == IrOpKind::VmReg)
build.vmovups(inst.regX64, luauReg(vmRegOp(inst.a)));
@ -114,12 +116,12 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
LUAU_ASSERT(!"Unsupported instruction form");
break;
case IrCmd::LOAD_NODE_VALUE_TV:
inst.regX64 = regs.allocXmmReg(index);
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
build.vmovups(inst.regX64, luauNodeValue(regOp(inst.a)));
break;
case IrCmd::LOAD_ENV:
inst.regX64 = regs.allocGprReg(SizeX64::qword, index);
inst.regX64 = regs.allocReg(SizeX64::qword, index);
build.mov(inst.regX64, sClosure);
build.mov(inst.regX64, qword[inst.regX64 + offsetof(Closure, env)]);
@ -127,7 +129,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
case IrCmd::GET_ARR_ADDR:
if (inst.b.kind == IrOpKind::Inst)
{
inst.regX64 = regs.allocGprRegOrReuse(SizeX64::qword, index, {inst.b});
inst.regX64 = regs.allocRegOrReuse(SizeX64::qword, index, {inst.b});
if (dwordReg(inst.regX64) != regOp(inst.b))
build.mov(dwordReg(inst.regX64), regOp(inst.b));
@ -137,7 +139,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
}
else if (inst.b.kind == IrOpKind::Constant)
{
inst.regX64 = regs.allocGprRegOrReuse(SizeX64::qword, index, {inst.a});
inst.regX64 = regs.allocRegOrReuse(SizeX64::qword, index, {inst.a});
build.mov(inst.regX64, qword[regOp(inst.a) + offsetof(Table, array)]);
@ -151,7 +153,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
break;
case IrCmd::GET_SLOT_NODE_ADDR:
{
inst.regX64 = regs.allocGprReg(SizeX64::qword, index);
inst.regX64 = regs.allocReg(SizeX64::qword, index);
ScopedRegX64 tmp{regs, SizeX64::qword};
@ -160,11 +162,11 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
}
case IrCmd::GET_HASH_NODE_ADDR:
{
inst.regX64 = regs.allocGprReg(SizeX64::qword, index);
// Custom bit shift value can only be placed in cl
ScopedRegX64 shiftTmp{regs, regs.takeReg(rcx, kInvalidInstIdx)};
inst.regX64 = regs.allocReg(SizeX64::qword, index);
ScopedRegX64 tmp{regs, SizeX64::qword};
build.mov(inst.regX64, qword[regOp(inst.a) + offsetof(Table, node)]);
@ -232,7 +234,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
build.vmovups(luauNodeValue(regOp(inst.a)), regOp(inst.b));
break;
case IrCmd::ADD_INT:
inst.regX64 = regs.allocGprRegOrReuse(SizeX64::dword, index, {inst.a});
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a});
if (inst.regX64 == regOp(inst.a) && intOp(inst.b) == 1)
build.inc(inst.regX64);
@ -242,7 +244,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
build.lea(inst.regX64, addr[regOp(inst.a) + intOp(inst.b)]);
break;
case IrCmd::SUB_INT:
inst.regX64 = regs.allocGprRegOrReuse(SizeX64::dword, index, {inst.a});
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a});
if (inst.regX64 == regOp(inst.a) && intOp(inst.b) == 1)
build.dec(inst.regX64);
@ -252,7 +254,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
build.lea(inst.regX64, addr[regOp(inst.a) - intOp(inst.b)]);
break;
case IrCmd::ADD_NUM:
inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a, inst.b});
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Constant)
{
@ -267,7 +269,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
}
break;
case IrCmd::SUB_NUM:
inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a, inst.b});
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Constant)
{
@ -282,7 +284,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
}
break;
case IrCmd::MUL_NUM:
inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a, inst.b});
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Constant)
{
@ -297,7 +299,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
}
break;
case IrCmd::DIV_NUM:
inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a, inst.b});
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Constant)
{
@ -313,7 +315,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
break;
case IrCmd::MOD_NUM:
{
inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a, inst.b});
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
ScopedRegX64 optLhsTmp{regs};
RegisterX64 lhs;
@ -362,7 +364,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
break;
}
case IrCmd::MIN_NUM:
inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a, inst.b});
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Constant)
{
@ -377,7 +379,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
}
break;
case IrCmd::MAX_NUM:
inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a, inst.b});
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Constant)
{
@ -393,7 +395,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
break;
case IrCmd::UNM_NUM:
{
inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a});
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a});
RegisterX64 src = regOp(inst.a);
@ -410,18 +412,18 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
break;
}
case IrCmd::FLOOR_NUM:
inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a});
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a});
build.vroundsd(inst.regX64, inst.regX64, memRegDoubleOp(inst.a), RoundingModeX64::RoundToNegativeInfinity);
break;
case IrCmd::CEIL_NUM:
inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a});
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a});
build.vroundsd(inst.regX64, inst.regX64, memRegDoubleOp(inst.a), RoundingModeX64::RoundToPositiveInfinity);
break;
case IrCmd::ROUND_NUM:
{
inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a});
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a});
ScopedRegX64 tmp1{regs, SizeX64::xmmword};
ScopedRegX64 tmp2{regs, SizeX64::xmmword};
@ -439,12 +441,12 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
break;
}
case IrCmd::SQRT_NUM:
inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a});
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a});
build.vsqrtsd(inst.regX64, inst.regX64, memRegDoubleOp(inst.a));
break;
case IrCmd::ABS_NUM:
inst.regX64 = regs.allocXmmRegOrReuse(index, {inst.a});
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a});
if (inst.a.kind != IrOpKind::Inst)
build.vmovsd(inst.regX64, memRegDoubleOp(inst.a));
@ -456,7 +458,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
case IrCmd::NOT_ANY:
{
// TODO: if we have a single user which is a STORE_INT, we are missing the opportunity to write directly to target
inst.regX64 = regs.allocGprRegOrReuse(SizeX64::dword, index, {inst.a, inst.b});
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a, inst.b});
Label saveone, savezero, exit;
@ -558,7 +560,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
callWrap.addArgument(SizeX64::qword, regOp(inst.a), inst.a);
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaH_getn)]);
inst.regX64 = regs.allocXmmReg(index);
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
build.vcvtsi2sd(inst.regX64, inst.regX64, eax);
break;
}
@ -566,8 +568,8 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
{
IrCallWrapperX64 callWrap(regs, build, index);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::dword, int32_t(uintOp(inst.a)), inst.a);
callWrap.addArgument(SizeX64::dword, int32_t(uintOp(inst.b)), inst.b);
callWrap.addArgument(SizeX64::dword, int32_t(uintOp(inst.a)));
callWrap.addArgument(SizeX64::dword, int32_t(uintOp(inst.b)));
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaH_new)]);
inst.regX64 = regs.takeReg(rax, index);
break;
@ -583,7 +585,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
}
case IrCmd::TRY_NUM_TO_INDEX:
{
inst.regX64 = regs.allocGprReg(SizeX64::dword, index);
inst.regX64 = regs.allocReg(SizeX64::dword, index);
ScopedRegX64 tmp{regs, SizeX64::xmmword};
@ -620,7 +622,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
break;
}
case IrCmd::INT_TO_NUM:
inst.regX64 = regs.allocXmmReg(index);
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
build.vcvtsi2sd(inst.regX64, inst.regX64, regOp(inst.a));
break;
@ -688,11 +690,10 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
if (nparams == LUA_MULTRET)
{
// Compute 'L->top - (ra + 1)', on SystemV, take r9 register to compute directly into the argument
// TODO: IrCallWrapperX64 should provide a way to 'guess' target argument register correctly
RegisterX64 reg = build.abi == ABIX64::Windows ? regs.allocGprReg(SizeX64::qword, kInvalidInstIdx) : regs.takeReg(rArg6, kInvalidInstIdx);
RegisterX64 reg = callWrap.suggestNextArgumentRegister(SizeX64::qword);
ScopedRegX64 tmp{regs, SizeX64::qword};
// L->top - (ra + 1)
build.mov(reg, qword[rState + offsetof(lua_State, top)]);
build.lea(tmp.reg, addr[rBase + (ra + 1) * sizeof(TValue)]);
build.sub(reg, tmp.reg);
@ -759,9 +760,35 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
}
break;
case IrCmd::GET_IMPORT:
regs.assertAllFree();
emitInstGetImportFallback(build, vmRegOp(inst.a), uintOp(inst.b));
{
ScopedRegX64 tmp1{regs, SizeX64::qword};
build.mov(tmp1.reg, sClosure);
IrCallWrapperX64 callWrap(regs, build, index);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, qword[tmp1.release() + offsetof(Closure, env)]);
callWrap.addArgument(SizeX64::qword, rConstants);
callWrap.addArgument(SizeX64::dword, uintOp(inst.b));
callWrap.addArgument(SizeX64::dword, 0);
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_getimport)]);
emitUpdateBase(build);
ScopedRegX64 tmp2{regs, SizeX64::qword};
// setobj2s(L, ra, L->top - 1)
build.mov(tmp2.reg, qword[rState + offsetof(lua_State, top)]);
build.sub(tmp2.reg, sizeof(TValue));
ScopedRegX64 tmp3{regs, SizeX64::xmmword};
build.vmovups(tmp3.reg, xmmword[tmp2.reg]);
build.vmovups(luauReg(vmRegOp(inst.a)), tmp3.reg);
// L->top--
build.mov(qword[rState + offsetof(lua_State, top)], tmp2.reg);
break;
}
case IrCmd::CONCAT:
{
IrCallWrapperX64 callWrap(regs, build, index);
@ -783,7 +810,6 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
// uprefs[] is either an actual value, or it points to UpVal object which has a pointer to value
Label skip;
// TODO: jumpIfTagIsNot can be generalized to take OperandX64 and then we can use it here; let's wait until we see this more though
build.cmp(dword[tmp1.reg + offsetof(TValue, tt)], LUA_TUPVAL);
build.jcc(ConditionX64::NotEqual, skip);
@ -822,36 +848,25 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
callPrepareForN(regs, build, vmRegOp(inst.a), vmRegOp(inst.b), vmRegOp(inst.c));
break;
case IrCmd::CHECK_TAG:
if (inst.a.kind == IrOpKind::Inst)
{
build.cmp(regOp(inst.a), tagOp(inst.b));
build.jcc(ConditionX64::NotEqual, labelOp(inst.c));
}
else if (inst.a.kind == IrOpKind::VmReg)
{
jumpIfTagIsNot(build, vmRegOp(inst.a), lua_Type(tagOp(inst.b)), labelOp(inst.c));
}
else if (inst.a.kind == IrOpKind::VmConst)
{
build.cmp(luauConstantTag(vmConstOp(inst.a)), tagOp(inst.b));
build.jcc(ConditionX64::NotEqual, labelOp(inst.c));
}
else
{
LUAU_ASSERT(!"Unsupported instruction form");
}
build.cmp(memRegTagOp(inst.a), tagOp(inst.b));
build.jcc(ConditionX64::NotEqual, labelOp(inst.c));
break;
case IrCmd::CHECK_READONLY:
jumpIfTableIsReadOnly(build, regOp(inst.a), labelOp(inst.b));
build.cmp(byte[regOp(inst.a) + offsetof(Table, readonly)], 0);
build.jcc(ConditionX64::NotEqual, labelOp(inst.b));
break;
case IrCmd::CHECK_NO_METATABLE:
jumpIfMetatablePresent(build, regOp(inst.a), labelOp(inst.b));
build.cmp(qword[regOp(inst.a) + offsetof(Table, metatable)], 0);
build.jcc(ConditionX64::NotEqual, labelOp(inst.b));
break;
case IrCmd::CHECK_SAFE_ENV:
{
ScopedRegX64 tmp{regs, SizeX64::qword};
jumpIfUnsafeEnv(build, tmp.reg, labelOp(inst.a));
build.mov(tmp.reg, sClosure);
build.mov(tmp.reg, qword[tmp.reg + offsetof(Closure, env)]);
build.cmp(byte[tmp.reg + offsetof(Table, safeenv)], 0);
build.jcc(ConditionX64::Equal, labelOp(inst.a));
break;
}
case IrCmd::CHECK_ARRAY_SIZE:
@ -872,11 +887,16 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
break;
}
case IrCmd::CHECK_NODE_NO_NEXT:
jumpIfNodeHasNext(build, regOp(inst.a), labelOp(inst.b));
{
ScopedRegX64 tmp{regs, SizeX64::dword};
build.mov(tmp.reg, dword[regOp(inst.a) + offsetof(LuaNode, key) + kOffsetOfTKeyNext]);
build.shr(tmp.reg, kNextBitOffset);
build.jcc(ConditionX64::NotZero, labelOp(inst.b));
break;
}
case IrCmd::INTERRUPT:
regs.assertAllFree();
emitInterrupt(build, uintOp(inst.a));
emitInterrupt(regs, build, uintOp(inst.a));
break;
case IrCmd::CHECK_GC:
callStepGc(regs, build);
@ -970,94 +990,127 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, IrBlock& next)
break;
case IrCmd::FORGLOOP:
regs.assertAllFree();
emitinstForGLoop(build, vmRegOp(inst.a), intOp(inst.b), labelOp(inst.c), labelOp(inst.d));
emitInstForGLoop(build, vmRegOp(inst.a), intOp(inst.b), labelOp(inst.c));
jumpOrFallthrough(blockOp(inst.d), next);
break;
case IrCmd::FORGLOOP_FALLBACK:
regs.assertAllFree();
emitinstForGLoopFallback(build, vmRegOp(inst.a), intOp(inst.b), labelOp(inst.c));
build.jmp(labelOp(inst.d));
{
IrCallWrapperX64 callWrap(regs, build, index);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::dword, vmRegOp(inst.a));
callWrap.addArgument(SizeX64::dword, intOp(inst.b));
callWrap.call(qword[rNativeContext + offsetof(NativeContext, forgLoopNonTableFallback)]);
emitUpdateBase(build);
build.test(al, al);
build.jcc(ConditionX64::NotZero, labelOp(inst.c));
jumpOrFallthrough(blockOp(inst.d), next);
break;
}
case IrCmd::FORGPREP_XNEXT_FALLBACK:
regs.assertAllFree();
emitInstForGPrepXnextFallback(build, uintOp(inst.a), vmRegOp(inst.b), labelOp(inst.c));
{
IrCallWrapperX64 callWrap(regs, build, index);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, luauRegAddress(vmRegOp(inst.b)));
callWrap.addArgument(SizeX64::dword, uintOp(inst.a) + 1);
callWrap.call(qword[rNativeContext + offsetof(NativeContext, forgPrepXnextFallback)]);
jumpOrFallthrough(blockOp(inst.c), next);
break;
}
case IrCmd::COVERAGE:
regs.assertAllFree();
emitInstCoverage(build, uintOp(inst.a));
{
ScopedRegX64 tmp1{regs, SizeX64::qword};
ScopedRegX64 tmp2{regs, SizeX64::dword};
ScopedRegX64 tmp3{regs, SizeX64::dword};
build.mov(tmp1.reg, sCode);
build.add(tmp1.reg, uintOp(inst.a) * sizeof(Instruction));
// hits = LUAU_INSN_E(*pc)
build.mov(tmp2.reg, dword[tmp1.reg]);
build.sar(tmp2.reg, 8);
// hits = (hits < (1 << 23) - 1) ? hits + 1 : hits;
build.xor_(tmp3.reg, tmp3.reg);
build.cmp(tmp2.reg, (1 << 23) - 1);
build.setcc(ConditionX64::NotEqual, byteReg(tmp3.reg));
build.add(tmp2.reg, tmp3.reg);
// VM_PATCH_E(pc, hits);
build.sal(tmp2.reg, 8);
build.movzx(tmp3.reg, byte[tmp1.reg]);
build.or_(tmp3.reg, tmp2.reg);
build.mov(dword[tmp1.reg], tmp3.reg);
break;
}
// Full instruction fallbacks
case IrCmd::FALLBACK_GETGLOBAL:
LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg);
LUAU_ASSERT(inst.c.kind == IrOpKind::VmConst);
regs.assertAllFree();
emitFallback(build, data, LOP_GETGLOBAL, uintOp(inst.a));
emitFallback(regs, build, data, LOP_GETGLOBAL, uintOp(inst.a));
break;
case IrCmd::FALLBACK_SETGLOBAL:
LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg);
LUAU_ASSERT(inst.c.kind == IrOpKind::VmConst);
regs.assertAllFree();
emitFallback(build, data, LOP_SETGLOBAL, uintOp(inst.a));
emitFallback(regs, build, data, LOP_SETGLOBAL, uintOp(inst.a));
break;
case IrCmd::FALLBACK_GETTABLEKS:
LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg);
LUAU_ASSERT(inst.c.kind == IrOpKind::VmReg);
LUAU_ASSERT(inst.d.kind == IrOpKind::VmConst);
regs.assertAllFree();
emitFallback(build, data, LOP_GETTABLEKS, uintOp(inst.a));
emitFallback(regs, build, data, LOP_GETTABLEKS, uintOp(inst.a));
break;
case IrCmd::FALLBACK_SETTABLEKS:
LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg);
LUAU_ASSERT(inst.c.kind == IrOpKind::VmReg);
LUAU_ASSERT(inst.d.kind == IrOpKind::VmConst);
regs.assertAllFree();
emitFallback(build, data, LOP_SETTABLEKS, uintOp(inst.a));
emitFallback(regs, build, data, LOP_SETTABLEKS, uintOp(inst.a));
break;
case IrCmd::FALLBACK_NAMECALL:
LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg);
LUAU_ASSERT(inst.c.kind == IrOpKind::VmReg);
LUAU_ASSERT(inst.d.kind == IrOpKind::VmConst);
regs.assertAllFree();
emitFallback(build, data, LOP_NAMECALL, uintOp(inst.a));
emitFallback(regs, build, data, LOP_NAMECALL, uintOp(inst.a));
break;
case IrCmd::FALLBACK_PREPVARARGS:
LUAU_ASSERT(inst.b.kind == IrOpKind::Constant);
regs.assertAllFree();
emitFallback(build, data, LOP_PREPVARARGS, uintOp(inst.a));
emitFallback(regs, build, data, LOP_PREPVARARGS, uintOp(inst.a));
break;
case IrCmd::FALLBACK_GETVARARGS:
LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg);
LUAU_ASSERT(inst.c.kind == IrOpKind::Constant);
regs.assertAllFree();
emitFallback(build, data, LOP_GETVARARGS, uintOp(inst.a));
emitFallback(regs, build, data, LOP_GETVARARGS, uintOp(inst.a));
break;
case IrCmd::FALLBACK_NEWCLOSURE:
LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg);
LUAU_ASSERT(inst.c.kind == IrOpKind::Constant);
regs.assertAllFree();
emitFallback(build, data, LOP_NEWCLOSURE, uintOp(inst.a));
emitFallback(regs, build, data, LOP_NEWCLOSURE, uintOp(inst.a));
break;
case IrCmd::FALLBACK_DUPCLOSURE:
LUAU_ASSERT(inst.b.kind == IrOpKind::VmReg);
LUAU_ASSERT(inst.c.kind == IrOpKind::VmConst);
regs.assertAllFree();
emitFallback(build, data, LOP_DUPCLOSURE, uintOp(inst.a));
emitFallback(regs, build, data, LOP_DUPCLOSURE, uintOp(inst.a));
break;
case IrCmd::FALLBACK_FORGPREP:
regs.assertAllFree();
emitFallback(build, data, LOP_FORGPREP, uintOp(inst.a));
emitFallback(regs, build, data, LOP_FORGPREP, uintOp(inst.a));
jumpOrFallthrough(blockOp(inst.c), next);
break;
default:
LUAU_ASSERT(!"Not supported yet");
// Pseudo instructions
case IrCmd::NOP:
case IrCmd::SUBSTITUTE:
LUAU_ASSERT(!"Pseudo instructions should not be lowered");
break;
}

View File

@ -1,9 +1,7 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "IrRegAllocA64.h"
#ifdef _MSC_VER
#include <intrin.h>
#endif
#include "BitUtils.h"
namespace Luau
{
@ -12,19 +10,6 @@ namespace CodeGen
namespace A64
{
inline int setBit(uint32_t n)
{
LUAU_ASSERT(n);
#ifdef _MSC_VER
unsigned long rl;
_BitScanReverse(&rl, n);
return int(rl);
#else
return 31 - __builtin_clz(n);
#endif
}
IrRegAllocA64::IrRegAllocA64(IrFunction& function, std::initializer_list<std::pair<RegisterA64, RegisterA64>> regs)
: function(function)
{
@ -52,7 +37,7 @@ RegisterA64 IrRegAllocA64::allocReg(KindA64 kind)
return noreg;
}
int index = setBit(set.free);
int index = 31 - countlz(set.free);
set.free &= ~(1u << index);
return RegisterA64{kind, uint8_t(index)};
@ -68,7 +53,7 @@ RegisterA64 IrRegAllocA64::allocTemp(KindA64 kind)
return noreg;
}
int index = setBit(set.free);
int index = 31 - countlz(set.free);
set.free &= ~(1u << index);
set.temp |= 1u << index;

View File

@ -1,6 +1,8 @@
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/IrRegAllocX64.h"
#include "Luau/IrUtils.h"
#include "EmitCommonX64.h"
namespace Luau
@ -12,11 +14,6 @@ namespace X64
static const RegisterX64 kGprAllocOrder[] = {rax, rdx, rcx, rbx, rsi, rdi, r8, r9, r10, r11};
static bool isFullTvalueOperand(IrCmd cmd)
{
return cmd == IrCmd::LOAD_TVALUE || cmd == IrCmd::LOAD_NODE_VALUE_TV;
}
IrRegAllocX64::IrRegAllocX64(AssemblyBuilderX64& build, IrFunction& function)
: build(build)
, function(function)
@ -27,50 +24,43 @@ IrRegAllocX64::IrRegAllocX64(AssemblyBuilderX64& build, IrFunction& function)
xmmInstUsers.fill(kInvalidInstIdx);
}
RegisterX64 IrRegAllocX64::allocGprReg(SizeX64 preferredSize, uint32_t instIdx)
RegisterX64 IrRegAllocX64::allocReg(SizeX64 size, uint32_t instIdx)
{
LUAU_ASSERT(
preferredSize == SizeX64::byte || preferredSize == SizeX64::word || preferredSize == SizeX64::dword || preferredSize == SizeX64::qword);
for (RegisterX64 reg : kGprAllocOrder)
if (size == SizeX64::xmmword)
{
if (freeGprMap[reg.index])
for (size_t i = 0; i < freeXmmMap.size(); ++i)
{
freeGprMap[reg.index] = false;
gprInstUsers[reg.index] = instIdx;
return RegisterX64{preferredSize, reg.index};
if (freeXmmMap[i])
{
freeXmmMap[i] = false;
xmmInstUsers[i] = instIdx;
return RegisterX64{size, uint8_t(i)};
}
}
}
// If possible, spill the value with the furthest next use
if (uint32_t furthestUseTarget = findInstructionWithFurthestNextUse(gprInstUsers); furthestUseTarget != kInvalidInstIdx)
return takeReg(function.instructions[furthestUseTarget].regX64, instIdx);
LUAU_ASSERT(!"Out of GPR registers to allocate");
return noreg;
}
RegisterX64 IrRegAllocX64::allocXmmReg(uint32_t instIdx)
{
for (size_t i = 0; i < freeXmmMap.size(); ++i)
else
{
if (freeXmmMap[i])
for (RegisterX64 reg : kGprAllocOrder)
{
freeXmmMap[i] = false;
xmmInstUsers[i] = instIdx;
return RegisterX64{SizeX64::xmmword, uint8_t(i)};
if (freeGprMap[reg.index])
{
freeGprMap[reg.index] = false;
gprInstUsers[reg.index] = instIdx;
return RegisterX64{size, reg.index};
}
}
}
// Out of registers, spill the value with the furthest next use
if (uint32_t furthestUseTarget = findInstructionWithFurthestNextUse(xmmInstUsers); furthestUseTarget != kInvalidInstIdx)
const std::array<uint32_t, 16>& regInstUsers = size == SizeX64::xmmword ? xmmInstUsers : gprInstUsers;
if (uint32_t furthestUseTarget = findInstructionWithFurthestNextUse(regInstUsers); furthestUseTarget != kInvalidInstIdx)
return takeReg(function.instructions[furthestUseTarget].regX64, instIdx);
LUAU_ASSERT(!"Out of XMM registers to allocate");
LUAU_ASSERT(!"Out of registers to allocate");
return noreg;
}
RegisterX64 IrRegAllocX64::allocGprRegOrReuse(SizeX64 preferredSize, uint32_t instIdx, std::initializer_list<IrOp> oprefs)
RegisterX64 IrRegAllocX64::allocRegOrReuse(SizeX64 size, uint32_t instIdx, std::initializer_list<IrOp> oprefs)
{
for (IrOp op : oprefs)
{
@ -81,39 +71,24 @@ RegisterX64 IrRegAllocX64::allocGprRegOrReuse(SizeX64 preferredSize, uint32_t in
if (source.lastUse == instIdx && !source.reusedReg && !source.spilled)
{
LUAU_ASSERT(source.regX64.size != SizeX64::xmmword);
// Not comparing size directly because we only need matching register set
if ((size == SizeX64::xmmword) != (source.regX64.size == SizeX64::xmmword))
continue;
LUAU_ASSERT(source.regX64 != noreg);
source.reusedReg = true;
gprInstUsers[source.regX64.index] = instIdx;
return RegisterX64{preferredSize, source.regX64.index};
if (size == SizeX64::xmmword)
xmmInstUsers[source.regX64.index] = instIdx;
else
gprInstUsers[source.regX64.index] = instIdx;
return RegisterX64{size, source.regX64.index};
}
}
return allocGprReg(preferredSize, instIdx);
}
RegisterX64 IrRegAllocX64::allocXmmRegOrReuse(uint32_t instIdx, std::initializer_list<IrOp> oprefs)
{
for (IrOp op : oprefs)
{
if (op.kind != IrOpKind::Inst)
continue;
IrInst& source = function.instructions[op.index];
if (source.lastUse == instIdx && !source.reusedReg && !source.spilled)
{
LUAU_ASSERT(source.regX64.size == SizeX64::xmmword);
LUAU_ASSERT(source.regX64 != noreg);
source.reusedReg = true;
xmmInstUsers[source.regX64.index] = instIdx;
return source.regX64;
}
}
return allocXmmReg(instIdx);
return allocReg(size, instIdx);
}
RegisterX64 IrRegAllocX64::takeReg(RegisterX64 reg, uint32_t instIdx)
@ -197,41 +172,34 @@ bool IrRegAllocX64::isLastUseReg(const IrInst& target, uint32_t instIdx) const
void IrRegAllocX64::preserve(IrInst& inst)
{
bool doubleSlot = isFullTvalueOperand(inst.cmd);
IrSpillX64 spill;
spill.instIdx = function.getInstIndex(inst);
spill.valueKind = getCmdValueKind(inst.cmd);
spill.spillId = nextSpillId++;
spill.originalLoc = inst.regX64;
// Find a free stack slot. Two consecutive slots might be required for 16 byte TValues, so '- 1' is used
for (unsigned i = 0; i < unsigned(usedSpillSlots.size() - 1); ++i)
// Loads from VmReg/VmConst don't have to be spilled, they can be restored from a register later
if (!hasRestoreOp(inst))
{
if (usedSpillSlots.test(i))
continue;
unsigned i = findSpillStackSlot(spill.valueKind);
if (doubleSlot && usedSpillSlots.test(i + 1))
{
++i; // No need to retest this double position
continue;
}
if (inst.regX64.size == SizeX64::xmmword && doubleSlot)
{
if (spill.valueKind == IrValueKind::Tvalue)
build.vmovups(xmmword[sSpillArea + i * 8], inst.regX64);
}
else if (inst.regX64.size == SizeX64::xmmword)
{
else if (spill.valueKind == IrValueKind::Double)
build.vmovsd(qword[sSpillArea + i * 8], inst.regX64);
}
else if (spill.valueKind == IrValueKind::Pointer)
build.mov(qword[sSpillArea + i * 8], inst.regX64);
else if (spill.valueKind == IrValueKind::Tag || spill.valueKind == IrValueKind::Int)
build.mov(dword[sSpillArea + i * 8], inst.regX64);
else
{
OperandX64 location = addr[sSpillArea + i * 8];
location.memSize = inst.regX64.size; // Override memory access size
build.mov(location, inst.regX64);
}
LUAU_ASSERT(!"unsupported value kind");
usedSpillSlots.set(i);
if (i + 1 > maxUsedSlot)
maxUsedSlot = i + 1;
if (doubleSlot)
if (spill.valueKind == IrValueKind::Tvalue)
{
usedSpillSlots.set(i + 1);
@ -239,22 +207,15 @@ void IrRegAllocX64::preserve(IrInst& inst)
maxUsedSlot = i + 2;
}
IrSpillX64 spill;
spill.instIdx = function.getInstIndex(inst);
spill.useDoubleSlot = doubleSlot;
spill.stackSlot = uint8_t(i);
spill.originalLoc = inst.regX64;
spills.push_back(spill);
freeReg(inst.regX64);
inst.regX64 = noreg;
inst.spilled = true;
return;
}
LUAU_ASSERT(!"nowhere to spill");
spills.push_back(spill);
freeReg(inst.regX64);
inst.regX64 = noreg;
inst.spilled = true;
}
void IrRegAllocX64::restore(IrInst& inst, bool intoOriginalLocation)
@ -267,35 +228,34 @@ void IrRegAllocX64::restore(IrInst& inst, bool intoOriginalLocation)
if (spill.instIdx == instIdx)
{
LUAU_ASSERT(spill.stackSlot != kNoStackSlot);
RegisterX64 reg;
RegisterX64 reg = intoOriginalLocation ? takeReg(spill.originalLoc, instIdx) : allocReg(spill.originalLoc.size, instIdx);
OperandX64 restoreLocation = noreg;
if (spill.originalLoc.size == SizeX64::xmmword)
if (spill.stackSlot != kNoStackSlot)
{
reg = intoOriginalLocation ? takeReg(spill.originalLoc, instIdx) : allocXmmReg(instIdx);
restoreLocation = addr[sSpillArea + spill.stackSlot * 8];
restoreLocation.memSize = reg.size;
if (spill.useDoubleSlot)
build.vmovups(reg, xmmword[sSpillArea + spill.stackSlot * 8]);
else
build.vmovsd(reg, qword[sSpillArea + spill.stackSlot * 8]);
usedSpillSlots.set(spill.stackSlot, false);
if (spill.valueKind == IrValueKind::Tvalue)
usedSpillSlots.set(spill.stackSlot + 1, false);
}
else
{
reg = intoOriginalLocation ? takeReg(spill.originalLoc, instIdx) : allocGprReg(spill.originalLoc.size, instIdx);
OperandX64 location = addr[sSpillArea + spill.stackSlot * 8];
location.memSize = reg.size; // Override memory access size
build.mov(reg, location);
restoreLocation = getRestoreAddress(inst, getRestoreOp(inst));
}
if (spill.valueKind == IrValueKind::Tvalue)
build.vmovups(reg, restoreLocation);
else if (spill.valueKind == IrValueKind::Double)
build.vmovsd(reg, restoreLocation);
else
build.mov(reg, restoreLocation);
inst.regX64 = reg;
inst.spilled = false;
usedSpillSlots.set(spill.stackSlot, false);
if (spill.useDoubleSlot)
usedSpillSlots.set(spill.stackSlot + 1, false);
spills[i] = spills.back();
spills.pop_back();
return;
@ -334,6 +294,81 @@ bool IrRegAllocX64::shouldFreeGpr(RegisterX64 reg) const
return false;
}
unsigned IrRegAllocX64::findSpillStackSlot(IrValueKind valueKind)
{
// Find a free stack slot. Two consecutive slots might be required for 16 byte TValues, so '- 1' is used
for (unsigned i = 0; i < unsigned(usedSpillSlots.size() - 1); ++i)
{
if (usedSpillSlots.test(i))
continue;
if (valueKind == IrValueKind::Tvalue && usedSpillSlots.test(i + 1))
{
++i; // No need to retest this double position
continue;
}
return i;
}
LUAU_ASSERT(!"nowhere to spill");
return ~0u;
}
IrOp IrRegAllocX64::getRestoreOp(const IrInst& inst) const
{
switch (inst.cmd)
{
case IrCmd::LOAD_TAG:
case IrCmd::LOAD_POINTER:
case IrCmd::LOAD_DOUBLE:
case IrCmd::LOAD_INT:
case IrCmd::LOAD_TVALUE:
{
IrOp location = inst.a;
// Might have an alternative location
if (IrOp alternative = function.findRestoreOp(inst); alternative.kind != IrOpKind::None)
location = alternative;
if (location.kind == IrOpKind::VmReg || location.kind == IrOpKind::VmConst)
return location;
break;
}
default:
break;
}
return IrOp();
}
bool IrRegAllocX64::hasRestoreOp(const IrInst& inst) const
{
return getRestoreOp(inst).kind != IrOpKind::None;
}
OperandX64 IrRegAllocX64::getRestoreAddress(const IrInst& inst, IrOp restoreOp)
{
switch (inst.cmd)
{
case IrCmd::LOAD_TAG:
return restoreOp.kind == IrOpKind::VmReg ? luauRegTag(vmRegOp(restoreOp)) : luauConstantTag(vmConstOp(restoreOp));
case IrCmd::LOAD_POINTER:
case IrCmd::LOAD_DOUBLE:
return restoreOp.kind == IrOpKind::VmReg ? luauRegValue(vmRegOp(restoreOp)) : luauConstantValue(vmConstOp(restoreOp));
case IrCmd::LOAD_INT:
LUAU_ASSERT(restoreOp.kind == IrOpKind::VmReg);
return luauRegValueInt(vmRegOp(restoreOp));
case IrCmd::LOAD_TVALUE:
return restoreOp.kind == IrOpKind::VmReg ? luauReg(vmRegOp(restoreOp)) : luauConstant(vmConstOp(restoreOp));
default:
break;
}
return noreg;
}
uint32_t IrRegAllocX64::findInstructionWithFurthestNextUse(const std::array<uint32_t, 16>& regInstUsers) const
{
uint32_t furthestUseTarget = kInvalidInstIdx;
@ -411,11 +446,7 @@ ScopedRegX64::~ScopedRegX64()
void ScopedRegX64::alloc(SizeX64 size)
{
LUAU_ASSERT(reg == noreg);
if (size == SizeX64::xmmword)
reg = owner.allocXmmReg(kInvalidInstIdx);
else
reg = owner.allocGprReg(size, kInvalidInstIdx);
reg = owner.allocReg(size, kInvalidInstIdx);
}
void ScopedRegX64::free()
@ -435,38 +466,36 @@ RegisterX64 ScopedRegX64::release()
ScopedSpills::ScopedSpills(IrRegAllocX64& owner)
: owner(owner)
{
snapshot = owner.spills;
startSpillId = owner.nextSpillId;
}
ScopedSpills::~ScopedSpills()
{
// Taking a copy of current spills because we are going to potentially restore them
std::vector<IrSpillX64> current = owner.spills;
unsigned endSpillId = owner.nextSpillId;
// Restore registers that were spilled inside scope protected by this object
for (IrSpillX64& curr : current)
for (size_t i = 0; i < owner.spills.size();)
{
// If spill existed before current scope, it can be restored outside of it
if (!wasSpilledBefore(curr))
IrSpillX64& spill = owner.spills[i];
// Restoring spills inside this scope cannot create new spills
LUAU_ASSERT(spill.spillId < endSpillId);
// If spill was created inside current scope, it has to be restored
if (spill.spillId >= startSpillId)
{
IrInst& inst = owner.function.instructions[curr.instIdx];
IrInst& inst = owner.function.instructions[spill.instIdx];
owner.restore(inst, /*intoOriginalLocation*/ true);
// Spill restore removes the spill entry, so loop is repeated at the same 'i'
}
else
{
i++;
}
}
}
bool ScopedSpills::wasSpilledBefore(const IrSpillX64& spill) const
{
for (const IrSpillX64& preexisting : snapshot)
{
if (spill.instIdx == preexisting.instIdx)
return true;
}
return false;
}
} // namespace X64
} // namespace CodeGen
} // namespace Luau

View File

@ -8,6 +8,8 @@
// TODO: when nresults is less than our actual result count, we can skip computing/writing unused results
static const int kMinMaxUnrolledParams = 5;
namespace Luau
{
namespace CodeGen
@ -23,7 +25,7 @@ BuiltinImplResult translateBuiltinNumberToNumber(
return {BuiltinImplType::None, -1};
build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback);
build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults));
build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(1), build.constInt(1));
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
@ -40,7 +42,7 @@ BuiltinImplResult translateBuiltin2NumberToNumber(
build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback);
build.loadAndCheckTag(args, LUA_TNUMBER, fallback);
build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults));
build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(2), build.constInt(1));
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
@ -56,12 +58,13 @@ BuiltinImplResult translateBuiltinNumberTo2Number(
return {BuiltinImplType::None, -1};
build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback);
build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults));
build.inst(
IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(1), build.constInt(nresults == 1 ? 1 : 2));
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
if (nresults > 1)
if (nresults != 1)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra + 1), build.constTag(LUA_TNUMBER));
return {BuiltinImplType::UsesFallback, 2};
@ -125,12 +128,33 @@ BuiltinImplResult translateBuiltinMathLog(
if (nparams < 1 || nresults > 1)
return {BuiltinImplType::None, -1};
build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback);
LuauBuiltinFunction fcId = bfid;
int fcParams = 1;
if (nparams != 1)
build.loadAndCheckTag(args, LUA_TNUMBER, fallback);
{
if (args.kind != IrOpKind::VmConst)
return {BuiltinImplType::None, -1};
build.inst(IrCmd::FASTCALL, build.constUint(bfid), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults));
LUAU_ASSERT(build.function.proto);
TValue protok = build.function.proto->k[vmConstOp(args)];
if (protok.tt != LUA_TNUMBER)
return {BuiltinImplType::None, -1};
// TODO: IR builtin lowering assumes that the only valid 2-argument call is log2; ideally, we use a less hacky way to indicate that
if (protok.value.n == 2.0)
fcParams = 2;
else if (protok.value.n == 10.0)
fcId = LBF_MATH_LOG10;
else
// TODO: We can precompute log(args) and divide by it, but that requires extra LOAD/STORE so for now just fall back as this is rare
return {BuiltinImplType::None, -1};
}
build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback);
build.inst(IrCmd::FASTCALL, build.constUint(fcId), build.vmReg(ra), build.vmReg(arg), args, build.constInt(fcParams), build.constInt(1));
if (ra != arg)
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));
@ -140,17 +164,26 @@ BuiltinImplResult translateBuiltinMathLog(
BuiltinImplResult translateBuiltinMathMin(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
// TODO: this can be extended for other number of arguments
if (nparams != 2 || nresults > 1)
if (nparams < 2 || nparams > kMinMaxUnrolledParams || nresults > 1)
return {BuiltinImplType::None, -1};
build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback);
build.loadAndCheckTag(args, LUA_TNUMBER, fallback);
for (int i = 3; i <= nparams; ++i)
build.loadAndCheckTag(build.vmReg(vmRegOp(args) + (i - 2)), LUA_TNUMBER, fallback);
IrOp varg1 = build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(arg));
IrOp varg2 = build.inst(IrCmd::LOAD_DOUBLE, args);
IrOp res = build.inst(IrCmd::MIN_NUM, varg2, varg1); // Swapped arguments are required for consistency with VM builtins
for (int i = 3; i <= nparams; ++i)
{
IrOp arg = build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(vmRegOp(args) + (i - 2)));
res = build.inst(IrCmd::MIN_NUM, arg, res);
}
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res);
if (ra != arg)
@ -161,17 +194,26 @@ BuiltinImplResult translateBuiltinMathMin(IrBuilder& build, int nparams, int ra,
BuiltinImplResult translateBuiltinMathMax(IrBuilder& build, int nparams, int ra, int arg, IrOp args, int nresults, IrOp fallback)
{
// TODO: this can be extended for other number of arguments
if (nparams != 2 || nresults > 1)
if (nparams < 2 || nparams > kMinMaxUnrolledParams || nresults > 1)
return {BuiltinImplType::None, -1};
build.loadAndCheckTag(build.vmReg(arg), LUA_TNUMBER, fallback);
build.loadAndCheckTag(args, LUA_TNUMBER, fallback);
for (int i = 3; i <= nparams; ++i)
build.loadAndCheckTag(build.vmReg(vmRegOp(args) + (i - 2)), LUA_TNUMBER, fallback);
IrOp varg1 = build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(arg));
IrOp varg2 = build.inst(IrCmd::LOAD_DOUBLE, args);
IrOp res = build.inst(IrCmd::MAX_NUM, varg2, varg1); // Swapped arguments are required for consistency with VM builtins
for (int i = 3; i <= nparams; ++i)
{
IrOp arg = build.inst(IrCmd::LOAD_DOUBLE, build.vmReg(vmRegOp(args) + (i - 2)));
res = build.inst(IrCmd::MAX_NUM, arg, res);
}
build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), res);
if (ra != arg)
@ -254,8 +296,7 @@ BuiltinImplResult translateBuiltinType(IrBuilder& build, int nparams, int ra, in
if (nparams < 1 || nresults > 1)
return {BuiltinImplType::None, -1};
build.inst(
IrCmd::FASTCALL, build.constUint(LBF_TYPE), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults));
build.inst(IrCmd::FASTCALL, build.constUint(LBF_TYPE), build.vmReg(ra), build.vmReg(arg), args, build.constInt(1), build.constInt(1));
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TSTRING));
@ -267,8 +308,7 @@ BuiltinImplResult translateBuiltinTypeof(IrBuilder& build, int nparams, int ra,
if (nparams < 1 || nresults > 1)
return {BuiltinImplType::None, -1};
build.inst(
IrCmd::FASTCALL, build.constUint(LBF_TYPEOF), build.vmReg(ra), build.vmReg(arg), args, build.constInt(nparams), build.constInt(nresults));
build.inst(IrCmd::FASTCALL, build.constUint(LBF_TYPEOF), build.vmReg(ra), build.vmReg(arg), args, build.constInt(1), build.constInt(1));
build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TSTRING));

View File

@ -284,7 +284,7 @@ void replace(IrFunction& function, IrBlock& block, uint32_t instIdx, IrInst repl
block.useCount--;
}
void substitute(IrFunction& function, IrInst& inst, IrOp replacement)
void substitute(IrFunction& function, IrInst& inst, IrOp replacement, IrOp location)
{
LUAU_ASSERT(!isBlockTerminator(inst.cmd));
@ -298,7 +298,7 @@ void substitute(IrFunction& function, IrInst& inst, IrOp replacement)
removeUse(function, inst.f);
inst.a = replacement;
inst.b = {};
inst.b = location;
inst.c = {};
inst.d = {};
inst.e = {};

View File

@ -16,7 +16,7 @@
#include <math.h>
#include <string.h>
#define CODEGEN_SET_FALLBACK(op, flags) data.context.fallback[op] = {execute_##op, flags}
#define CODEGEN_SET_FALLBACK(op) data.context.fallback[op] = {execute_##op}
namespace Luau
{
@ -36,20 +36,21 @@ NativeState::~NativeState() = default;
void initFallbackTable(NativeState& data)
{
// When fallback is completely removed, remove it from includeInsts list in lvmexecute_split.py
CODEGEN_SET_FALLBACK(LOP_NEWCLOSURE, 0);
CODEGEN_SET_FALLBACK(LOP_NAMECALL, 0);
CODEGEN_SET_FALLBACK(LOP_FORGPREP, kFallbackUpdatePc);
CODEGEN_SET_FALLBACK(LOP_GETVARARGS, 0);
CODEGEN_SET_FALLBACK(LOP_DUPCLOSURE, 0);
CODEGEN_SET_FALLBACK(LOP_PREPVARARGS, 0);
CODEGEN_SET_FALLBACK(LOP_BREAK, 0);
CODEGEN_SET_FALLBACK(LOP_NEWCLOSURE);
CODEGEN_SET_FALLBACK(LOP_NAMECALL);
CODEGEN_SET_FALLBACK(LOP_FORGPREP);
CODEGEN_SET_FALLBACK(LOP_GETVARARGS);
CODEGEN_SET_FALLBACK(LOP_DUPCLOSURE);
CODEGEN_SET_FALLBACK(LOP_PREPVARARGS);
CODEGEN_SET_FALLBACK(LOP_BREAK);
CODEGEN_SET_FALLBACK(LOP_SETLIST);
// Fallbacks that are called from partial implementation of an instruction
// TODO: these fallbacks should be replaced with special functions that exclude the (redundantly executed) fast path from the fallback
CODEGEN_SET_FALLBACK(LOP_GETGLOBAL, 0);
CODEGEN_SET_FALLBACK(LOP_SETGLOBAL, 0);
CODEGEN_SET_FALLBACK(LOP_GETTABLEKS, 0);
CODEGEN_SET_FALLBACK(LOP_SETTABLEKS, 0);
CODEGEN_SET_FALLBACK(LOP_GETGLOBAL);
CODEGEN_SET_FALLBACK(LOP_SETGLOBAL);
CODEGEN_SET_FALLBACK(LOP_GETTABLEKS);
CODEGEN_SET_FALLBACK(LOP_SETTABLEKS);
}
void initHelperFunctions(NativeState& data)
@ -105,6 +106,7 @@ void initHelperFunctions(NativeState& data)
data.context.libm_tan = tan;
data.context.libm_tanh = tanh;
data.context.forgLoopTableIter = forgLoopTableIter;
data.context.forgLoopNodeIter = forgLoopNodeIter;
data.context.forgLoopNonTableFallback = forgLoopNonTableFallback;
data.context.forgPrepXnextFallback = forgPrepXnextFallback;

View File

@ -23,15 +23,7 @@ namespace CodeGen
class UnwindBuilder;
using FallbackFn = const Instruction*(lua_State* L, const Instruction* pc, StkId base, TValue* k);
constexpr uint8_t kFallbackUpdatePc = 1 << 0;
struct NativeFallback
{
FallbackFn* fallback;
uint8_t flags;
};
using FallbackFn = const Instruction* (*)(lua_State* L, const Instruction* pc, StkId base, TValue* k);
struct NativeProto
{
@ -96,6 +88,7 @@ struct NativeContext
double (*libm_modf)(double, double*) = nullptr;
// Helper functions
bool (*forgLoopTableIter)(lua_State* L, Table* h, int index, TValue* ra) = nullptr;
bool (*forgLoopNodeIter)(lua_State* L, Table* h, int index, TValue* ra) = nullptr;
bool (*forgLoopNonTableFallback)(lua_State* L, int insnA, int aux) = nullptr;
void (*forgPrepXnextFallback)(lua_State* L, TValue* ra, int pc) = nullptr;
@ -106,7 +99,7 @@ struct NativeContext
Closure* (*returnFallback)(lua_State* L, StkId ra, int n) = nullptr;
// Opcode fallbacks, implemented in C
NativeFallback fallback[LOP__COUNT] = {};
FallbackFn fallback[LOP__COUNT] = {};
// Fast call methods, implemented in C
luau_FastFunction luauF_table[256] = {};

View File

@ -502,6 +502,8 @@ static void constPropInInst(ConstPropState& state, IrBuilder& build, IrFunction&
}
}
break;
// TODO: FASTCALL is more restrictive than INVOKE_FASTCALL; we should either determine the exact semantics, or rework it
case IrCmd::FASTCALL:
case IrCmd::INVOKE_FASTCALL:
handleBuiltinEffects(state, LuauBuiltinFunction(function.uintOp(inst.a)), vmRegOp(inst.b), function.intOp(inst.f));

View File

@ -132,7 +132,7 @@ size_t UnwindBuilderDwarf2::getBeginOffset() const
return beginOffset;
}
void UnwindBuilderDwarf2::start()
void UnwindBuilderDwarf2::startInfo()
{
uint8_t* cieLength = pos;
pos = writeu32(pos, 0); // Length (to be filled later)
@ -149,13 +149,23 @@ void UnwindBuilderDwarf2::start()
// Optional CIE augmentation section (not present)
// Call frame instructions (common for all FDEs, of which we have 1)
stackOffset = 8; // Return address was pushed by calling the function
pos = defineCfaExpression(pos, DW_REG_RSP, stackOffset); // Define CFA to be the rsp + 8
pos = defineCfaExpression(pos, DW_REG_RSP, 8); // Define CFA to be the rsp + 8
pos = defineSavedRegisterLocation(pos, DW_REG_RA, 8); // Define return address register (RA) to be located at CFA - 8
pos = alignPosition(cieLength, pos);
writeu32(cieLength, unsigned(pos - cieLength - 4)); // Length field itself is excluded from length
}
void UnwindBuilderDwarf2::startFunction()
{
// End offset is filled in later and everything gets adjusted at the end
UnwindFunctionDwarf2 func;
func.beginOffset = 0;
func.endOffset = 0;
func.fdeEntryStartPos = uint32_t(pos - rawData);
unwindFunctions.push_back(func);
stackOffset = 8; // Return address was pushed by calling the function
fdeEntryStart = pos; // Will be written at the end
pos = writeu32(pos, 0); // Length (to be filled later)
@ -198,14 +208,20 @@ void UnwindBuilderDwarf2::setupFrameReg(X64::RegisterX64 reg, int espOffset)
// Cfa is based on rsp, so no additonal commands are required
}
void UnwindBuilderDwarf2::finish()
void UnwindBuilderDwarf2::finishFunction(uint32_t beginOffset, uint32_t endOffset)
{
unwindFunctions.back().beginOffset = beginOffset;
unwindFunctions.back().endOffset = endOffset;
LUAU_ASSERT(stackOffset % 16 == 0 && "stack has to be aligned to 16 bytes after prologue");
LUAU_ASSERT(fdeEntryStart != nullptr);
pos = alignPosition(fdeEntryStart, pos);
writeu32(fdeEntryStart, unsigned(pos - fdeEntryStart - 4)); // Length field itself is excluded from length
}
void UnwindBuilderDwarf2::finishInfo()
{
// Terminate section
pos = writeu32(pos, 0);
@ -217,15 +233,26 @@ size_t UnwindBuilderDwarf2::getSize() const
return size_t(pos - rawData);
}
void UnwindBuilderDwarf2::finalize(char* target, void* funcAddress, size_t funcSize) const
size_t UnwindBuilderDwarf2::getFunctionCount() const
{
return unwindFunctions.size();
}
void UnwindBuilderDwarf2::finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const
{
memcpy(target, rawData, getSize());
LUAU_ASSERT(fdeEntryStart != nullptr);
unsigned fdeEntryStartPos = unsigned(fdeEntryStart - rawData);
for (const UnwindFunctionDwarf2& func : unwindFunctions)
{
uint8_t* fdeEntryStart = (uint8_t*)target + func.fdeEntryStartPos;
writeu64((uint8_t*)target + fdeEntryStartPos + kFdeInitialLocationOffset, uintptr_t(funcAddress));
writeu64((uint8_t*)target + fdeEntryStartPos + kFdeAddressRangeOffset, funcSize);
writeu64(fdeEntryStart + kFdeInitialLocationOffset, uintptr_t(funcAddress) + offset + func.beginOffset);
if (func.endOffset == kFullBlockFuncton)
writeu64(fdeEntryStart + kFdeAddressRangeOffset, funcSize - offset);
else
writeu64(fdeEntryStart + kFdeAddressRangeOffset, func.endOffset - func.beginOffset);
}
}
} // namespace CodeGen

View File

@ -21,17 +21,6 @@ namespace Luau
namespace CodeGen
{
// This struct matches the layout of UNWIND_INFO from ehdata.h
struct UnwindInfoWin
{
uint8_t version : 3;
uint8_t flags : 5;
uint8_t prologsize;
uint8_t unwindcodecount;
uint8_t framereg : 4;
uint8_t frameregoff : 4;
};
void UnwindBuilderWin::setBeginOffset(size_t beginOffset)
{
this->beginOffset = beginOffset;
@ -42,11 +31,28 @@ size_t UnwindBuilderWin::getBeginOffset() const
return beginOffset;
}
void UnwindBuilderWin::start()
{
stackOffset = 8; // Return address was pushed by calling the function
void UnwindBuilderWin::startInfo() {}
void UnwindBuilderWin::startFunction()
{
// End offset is filled in later and everything gets adjusted at the end
UnwindFunctionWin func;
func.beginOffset = 0;
func.endOffset = 0;
func.unwindInfoOffset = uint32_t(rawDataPos - rawData);
unwindFunctions.push_back(func);
unwindCodes.clear();
unwindCodes.reserve(16);
prologSize = 0;
// rax has register index 0, which in Windows unwind info means that frame register is not used
frameReg = X64::rax;
frameRegOffset = 0;
// Return address was pushed by calling the function
stackOffset = 8;
}
void UnwindBuilderWin::spill(int espOffset, X64::RegisterX64 reg)
@ -85,49 +91,89 @@ void UnwindBuilderWin::setupFrameReg(X64::RegisterX64 reg, int espOffset)
unwindCodes.push_back({prologSize, UWOP_SET_FPREG, frameRegOffset});
}
void UnwindBuilderWin::finish()
void UnwindBuilderWin::finishFunction(uint32_t beginOffset, uint32_t endOffset)
{
unwindFunctions.back().beginOffset = beginOffset;
unwindFunctions.back().endOffset = endOffset;
// Windows unwind code count is stored in uint8_t, so we can't have more
LUAU_ASSERT(unwindCodes.size() < 256);
LUAU_ASSERT(stackOffset % 16 == 0 && "stack has to be aligned to 16 bytes after prologue");
size_t codeArraySize = unwindCodes.size();
codeArraySize = (codeArraySize + 1) & ~1; // Size has to be even, but unwind code count doesn't have to
infoSize = sizeof(UnwindInfoWin) + sizeof(UnwindCodeWin) * codeArraySize;
}
size_t UnwindBuilderWin::getSize() const
{
return infoSize;
}
void UnwindBuilderWin::finalize(char* target, void* funcAddress, size_t funcSize) const
{
UnwindInfoWin info;
info.version = 1;
info.flags = 0; // No EH
info.prologsize = prologSize;
info.unwindcodecount = uint8_t(unwindCodes.size());
LUAU_ASSERT(frameReg.index < 16);
info.framereg = frameReg.index;
LUAU_ASSERT(frameRegOffset < 16);
info.frameregoff = frameRegOffset;
memcpy(target, &info, sizeof(info));
target += sizeof(UnwindInfoWin);
LUAU_ASSERT(rawDataPos + sizeof(info) <= rawData + kRawDataLimit);
memcpy(rawDataPos, &info, sizeof(info));
rawDataPos += sizeof(info);
if (!unwindCodes.empty())
{
// Copy unwind codes in reverse order
// Some unwind codes take up two array slots, but we don't use those atm
char* pos = target + sizeof(UnwindCodeWin) * (unwindCodes.size() - 1);
uint8_t* unwindCodePos = rawDataPos + sizeof(UnwindCodeWin) * (unwindCodes.size() - 1);
LUAU_ASSERT(unwindCodePos <= rawData + kRawDataLimit);
for (size_t i = 0; i < unwindCodes.size(); i++)
{
memcpy(pos, &unwindCodes[i], sizeof(UnwindCodeWin));
pos -= sizeof(UnwindCodeWin);
memcpy(unwindCodePos, &unwindCodes[i], sizeof(UnwindCodeWin));
unwindCodePos -= sizeof(UnwindCodeWin);
}
}
rawDataPos += sizeof(UnwindCodeWin) * unwindCodes.size();
// Size has to be even, but unwind code count doesn't have to
if (unwindCodes.size() % 2 != 0)
rawDataPos += sizeof(UnwindCodeWin);
LUAU_ASSERT(rawDataPos <= rawData + kRawDataLimit);
}
void UnwindBuilderWin::finishInfo() {}
size_t UnwindBuilderWin::getSize() const
{
return sizeof(UnwindFunctionWin) * unwindFunctions.size() + size_t(rawDataPos - rawData);
}
size_t UnwindBuilderWin::getFunctionCount() const
{
return unwindFunctions.size();
}
void UnwindBuilderWin::finalize(char* target, size_t offset, void* funcAddress, size_t funcSize) const
{
// Copy adjusted function information
for (UnwindFunctionWin func : unwindFunctions)
{
// Code will start after the unwind info
func.beginOffset += uint32_t(offset);
// Whole block is a part of a 'single function'
if (func.endOffset == kFullBlockFuncton)
func.endOffset = uint32_t(funcSize);
else
func.endOffset += uint32_t(offset);
// Unwind data is placed right after the RUNTIME_FUNCTION data
func.unwindInfoOffset += uint32_t(sizeof(UnwindFunctionWin) * unwindFunctions.size());
memcpy(target, &func, sizeof(func));
target += sizeof(func);
}
// Copy unwind codes
memcpy(target, rawData, size_t(rawDataPos - rawData));
}
} // namespace CodeGen

View File

@ -89,9 +89,7 @@ target_sources(Luau.CodeGen PRIVATE
CodeGen/src/CodeGenA64.cpp
CodeGen/src/CodeGenX64.cpp
CodeGen/src/EmitBuiltinsX64.cpp
CodeGen/src/EmitCommonA64.cpp
CodeGen/src/EmitCommonX64.cpp
CodeGen/src/EmitInstructionA64.cpp
CodeGen/src/EmitInstructionX64.cpp
CodeGen/src/Fallbacks.cpp
CodeGen/src/IrAnalysis.cpp
@ -111,6 +109,7 @@ target_sources(Luau.CodeGen PRIVATE
CodeGen/src/UnwindBuilderDwarf2.cpp
CodeGen/src/UnwindBuilderWin.cpp
CodeGen/src/BitUtils.h
CodeGen/src/ByteUtils.h
CodeGen/src/CustomExecUtils.h
CodeGen/src/CodeGenUtils.h
@ -120,7 +119,6 @@ target_sources(Luau.CodeGen PRIVATE
CodeGen/src/EmitCommon.h
CodeGen/src/EmitCommonA64.h
CodeGen/src/EmitCommonX64.h
CodeGen/src/EmitInstructionA64.h
CodeGen/src/EmitInstructionX64.h
CodeGen/src/Fallbacks.h
CodeGen/src/FallbacksProlog.h

View File

@ -538,6 +538,8 @@ const void* lua_topointer(lua_State* L, int idx)
StkId o = index2addr(L, idx);
switch (ttype(o))
{
case LUA_TSTRING:
return tsvalue(o);
case LUA_TTABLE:
return hvalue(o);
case LUA_TFUNCTION:

View File

@ -33,8 +33,6 @@
#include <string.h>
LUAU_FASTFLAGVARIABLE(LuauArrBoundResizeFix, false)
// max size of both array and hash part is 2^MAXBITS
#define MAXBITS 26
#define MAXSIZE (1 << MAXBITS)
@ -466,30 +464,22 @@ static void rehash(lua_State* L, Table* t, const TValue* ek)
int na = computesizes(nums, &nasize);
int nh = totaluse - na;
if (FFlag::LuauArrBoundResizeFix)
// enforce the boundary invariant; for performance, only do hash lookups if we must
int nadjusted = adjustasize(t, nasize, ek);
// count how many extra elements belong to array part instead of hash part
int aextra = nadjusted - nasize;
if (aextra != 0)
{
// enforce the boundary invariant; for performance, only do hash lookups if we must
int nadjusted = adjustasize(t, nasize, ek);
// we no longer need to store those extra array elements in hash part
nh -= aextra;
// count how many extra elements belong to array part instead of hash part
int aextra = nadjusted - nasize;
// because hash nodes are twice as large as array nodes, the memory we saved for hash parts can be used by array part
// this follows the general sparse array part optimization where array is allocated when 50% occupation is reached
nasize = nadjusted + aextra;
if (aextra != 0)
{
// we no longer need to store those extra array elements in hash part
nh -= aextra;
// because hash nodes are twice as large as array nodes, the memory we saved for hash parts can be used by array part
// this follows the general sparse array part optimization where array is allocated when 50% occupation is reached
nasize = nadjusted + aextra;
// since the size was changed, it's again important to enforce the boundary invariant at the new size
nasize = adjustasize(t, nasize, ek);
}
}
else
{
// enforce the boundary invariant; for performance, only do hash lookups if we must
// since the size was changed, it's again important to enforce the boundary invariant at the new size
nasize = adjustasize(t, nasize, ek);
}

View File

@ -21,7 +21,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* Data, size_t Size)
static Luau::NullFileResolver fileResolver;
static Luau::NullConfigResolver configResolver;
static Luau::Frontend frontend{&fileResolver, &configResolver};
static int once = (Luau::registerBuiltinGlobals(frontend), 1);
static int once = (Luau::registerBuiltinGlobals(frontend, frontend.globals, false), 1);
(void)once;
static int once2 = (Luau::freeze(frontend.globals.globalTypes), 1);
(void)once2;

View File

@ -97,12 +97,12 @@ lua_State* createGlobalState()
return L;
}
int registerTypes(Luau::TypeChecker& typeChecker, Luau::GlobalTypes& globals)
int registerTypes(Luau::Frontend& frontend, Luau::GlobalTypes& globals, bool forAutocomplete)
{
using namespace Luau;
using std::nullopt;
Luau::registerBuiltinGlobals(typeChecker, globals);
Luau::registerBuiltinGlobals(frontend, globals, forAutocomplete);
TypeArena& arena = globals.globalTypes;
BuiltinTypes& builtinTypes = *globals.builtinTypes;
@ -147,10 +147,10 @@ int registerTypes(Luau::TypeChecker& typeChecker, Luau::GlobalTypes& globals)
static void setupFrontend(Luau::Frontend& frontend)
{
registerTypes(frontend.typeChecker, frontend.globals);
registerTypes(frontend, frontend.globals, false);
Luau::freeze(frontend.globals.globalTypes);
registerTypes(frontend.typeCheckerForAutocomplete, frontend.globalsForAutocomplete);
registerTypes(frontend, frontend.globalsForAutocomplete, true);
Luau::freeze(frontend.globalsForAutocomplete.globalTypes);
frontend.iceHandler.onInternalError = [](const char* error) {

View File

@ -26,7 +26,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* Data, size_t Size)
static Luau::NullFileResolver fileResolver;
static Luau::NullConfigResolver configResolver;
static Luau::Frontend frontend{&fileResolver, &configResolver};
static int once = (Luau::registerBuiltinGlobals(frontend), 1);
static int once = (Luau::registerBuiltinGlobals(frontend, frontend.globals, false), 1);
(void)once;
static int once2 = (Luau::freeze(frontend.globals.globalTypes), 1);
(void)once2;

View File

@ -86,6 +86,7 @@ TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "Binary")
SINGLE_COMPARE(add(x0, x1, x2, 7), 0x8B021C20);
SINGLE_COMPARE(sub(x0, x1, x2), 0xCB020020);
SINGLE_COMPARE(and_(x0, x1, x2), 0x8A020020);
SINGLE_COMPARE(bic(x0, x1, x2), 0x8A220020);
SINGLE_COMPARE(orr(x0, x1, x2), 0xAA020020);
SINGLE_COMPARE(eor(x0, x1, x2), 0xCA020020);
SINGLE_COMPARE(lsl(x0, x1, x2), 0x9AC22020);
@ -94,6 +95,7 @@ TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "Binary")
SINGLE_COMPARE(asr(x0, x1, x2), 0x9AC22820);
SINGLE_COMPARE(ror(x0, x1, x2), 0x9AC22C20);
SINGLE_COMPARE(cmp(x0, x1), 0xEB01001F);
SINGLE_COMPARE(tst(x0, x1), 0xEA01001F);
// reg, imm
SINGLE_COMPARE(add(x3, x7, 78), 0x910138E3);
@ -102,6 +104,24 @@ TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "Binary")
SINGLE_COMPARE(cmp(w0, 42), 0x7100A81F);
}
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "BinaryImm")
{
// instructions
SINGLE_COMPARE(and_(w1, w2, 1), 0x12000041);
SINGLE_COMPARE(orr(w1, w2, 1), 0x32000041);
SINGLE_COMPARE(eor(w1, w2, 1), 0x52000041);
SINGLE_COMPARE(tst(w1, 1), 0x7200003f);
// various mask forms
SINGLE_COMPARE(and_(w0, w0, 1), 0x12000000);
SINGLE_COMPARE(and_(w0, w0, 3), 0x12000400);
SINGLE_COMPARE(and_(w0, w0, 7), 0x12000800);
SINGLE_COMPARE(and_(w0, w0, 2147483647), 0x12007800);
SINGLE_COMPARE(and_(w0, w0, 6), 0x121F0400);
SINGLE_COMPARE(and_(w0, w0, 12), 0x121E0400);
SINGLE_COMPARE(and_(w0, w0, 2147483648), 0x12010000);
}
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "Loads")
{
// address forms
@ -359,11 +379,13 @@ TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "AddressOffsetSize")
SINGLE_COMPARE(str(q0, mem(x1, 16)), 0x3D800420);
}
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "ConditionalSelect")
TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "Conditionals")
{
SINGLE_COMPARE(csel(x0, x1, x2, ConditionA64::Equal), 0x9A820020);
SINGLE_COMPARE(csel(w0, w1, w2, ConditionA64::Equal), 0x1A820020);
SINGLE_COMPARE(fcsel(d0, d1, d2, ConditionA64::Equal), 0x1E620C20);
SINGLE_COMPARE(cset(x1, ConditionA64::Less), 0x9A9FA7E1);
}
TEST_CASE("LogTest")
@ -394,6 +416,7 @@ TEST_CASE("LogTest")
build.ldr(q1, x2);
build.csel(x0, x1, x2, ConditionA64::Equal);
build.cset(x0, ConditionA64::Equal);
build.fcmp(d0, d1);
build.fcmpz(d0);
@ -423,6 +446,7 @@ TEST_CASE("LogTest")
fabs d1,d2
ldr q1,[x2]
csel x0,x1,x2,eq
cset x0,eq
fcmp d0,d1
fcmp d0,#0
.L1:

View File

@ -67,6 +67,9 @@ TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "BaseBinaryInstructionForms")
SINGLE_COMPARE(add(rax, 0x7f), 0x48, 0x83, 0xc0, 0x7f);
SINGLE_COMPARE(add(rax, 0x80), 0x48, 0x81, 0xc0, 0x80, 0x00, 0x00, 0x00);
SINGLE_COMPARE(add(r10, 0x7fffffff), 0x49, 0x81, 0xc2, 0xff, 0xff, 0xff, 0x7f);
SINGLE_COMPARE(add(al, 3), 0x80, 0xc0, 0x03);
SINGLE_COMPARE(add(sil, 3), 0x48, 0x80, 0xc6, 0x03);
SINGLE_COMPARE(add(r11b, 3), 0x49, 0x80, 0xc3, 0x03);
// reg, [reg]
SINGLE_COMPARE(add(rax, qword[rax]), 0x48, 0x03, 0x00);
@ -191,6 +194,8 @@ TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfMov")
SINGLE_COMPARE(mov64(rcx, 0x1234567812345678ll), 0x48, 0xb9, 0x78, 0x56, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12);
SINGLE_COMPARE(mov(ecx, 2), 0xb9, 0x02, 0x00, 0x00, 0x00);
SINGLE_COMPARE(mov(cl, 2), 0xb1, 0x02);
SINGLE_COMPARE(mov(sil, 2), 0x48, 0xb6, 0x02);
SINGLE_COMPARE(mov(r9b, 2), 0x49, 0xb1, 0x02);
SINGLE_COMPARE(mov(rcx, qword[rdi]), 0x48, 0x8b, 0x0f);
SINGLE_COMPARE(mov(dword[rax], 0xabcd), 0xc7, 0x00, 0xcd, 0xab, 0x00, 0x00);
SINGLE_COMPARE(mov(r13, 1), 0x49, 0xbd, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
@ -201,6 +206,8 @@ TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfMov")
SINGLE_COMPARE(mov(qword[rdx], r9), 0x4c, 0x89, 0x0a);
SINGLE_COMPARE(mov(byte[rsi], 0x3), 0xc6, 0x06, 0x03);
SINGLE_COMPARE(mov(byte[rsi], al), 0x88, 0x06);
SINGLE_COMPARE(mov(byte[rsi], dil), 0x48, 0x88, 0x3e);
SINGLE_COMPARE(mov(byte[rsi], r10b), 0x4c, 0x88, 0x16);
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfMovExtended")
@ -229,6 +236,8 @@ TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfShift")
{
SINGLE_COMPARE(shl(al, 1), 0xd0, 0xe0);
SINGLE_COMPARE(shl(al, cl), 0xd2, 0xe0);
SINGLE_COMPARE(shl(sil, cl), 0x48, 0xd2, 0xe6);
SINGLE_COMPARE(shl(r10b, cl), 0x49, 0xd2, 0xe2);
SINGLE_COMPARE(shr(al, 4), 0xc0, 0xe8, 0x04);
SINGLE_COMPARE(shr(eax, 1), 0xd1, 0xe8);
SINGLE_COMPARE(sal(eax, cl), 0xd3, 0xe0);
@ -247,6 +256,7 @@ TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfLea")
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfSetcc")
{
SINGLE_COMPARE(setcc(ConditionX64::NotEqual, bl), 0x0f, 0x95, 0xc3);
SINGLE_COMPARE(setcc(ConditionX64::NotEqual, dil), 0x48, 0x0f, 0x95, 0xc7);
SINGLE_COMPARE(setcc(ConditionX64::BelowEqual, byte[rcx]), 0x0f, 0x96, 0x01);
}

View File

@ -3473,4 +3473,34 @@ TEST_CASE_FIXTURE(ACFixture, "autocomplete_response_perf1" * doctest::timeout(0.
CHECK(ac.entryMap.count("Instance"));
}
TEST_CASE_FIXTURE(ACFixture, "strict_mode_force")
{
check(R"(
--!nonstrict
local a: {x: number} = {x=1}
local b = a
local c = b.@1
)");
auto ac = autocomplete('1');
CHECK_EQ(1, ac.entryMap.size());
CHECK(ac.entryMap.count("x"));
}
TEST_CASE_FIXTURE(ACFixture, "suggest_exported_types")
{
ScopedFastFlag luauCopyExportedTypes{"LuauCopyExportedTypes", true};
check(R"(
export type Type = {a: number}
local a: T@1
)");
auto ac = autocomplete('1');
CHECK(ac.entryMap.count("Type"));
CHECK_EQ(ac.context, AutocompleteContext::Type);
}
TEST_SUITE_END();

View File

@ -135,7 +135,8 @@ TEST_CASE("WindowsUnwindCodesX64")
UnwindBuilderWin unwind;
unwind.start();
unwind.startInfo();
unwind.startFunction();
unwind.spill(16, rdx);
unwind.spill(8, rcx);
unwind.save(rdi);
@ -148,14 +149,15 @@ TEST_CASE("WindowsUnwindCodesX64")
unwind.save(r15);
unwind.allocStack(72);
unwind.setupFrameReg(rbp, 48);
unwind.finish();
unwind.finishFunction(0x11223344, 0x55443322);
unwind.finishInfo();
std::vector<char> data;
data.resize(unwind.getSize());
unwind.finalize(data.data(), nullptr, 0);
unwind.finalize(data.data(), 0, nullptr, 0);
std::vector<uint8_t> expected{0x01, 0x23, 0x0a, 0x35, 0x23, 0x33, 0x1e, 0x82, 0x1a, 0xf0, 0x18, 0xe0, 0x16, 0xd0, 0x14, 0xc0, 0x12, 0x50, 0x10,
0x30, 0x0e, 0x60, 0x0c, 0x70};
std::vector<uint8_t> expected{0x44, 0x33, 0x22, 0x11, 0x22, 0x33, 0x44, 0x55, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x23, 0x0a, 0x35, 0x23, 0x33, 0x1e,
0x82, 0x1a, 0xf0, 0x18, 0xe0, 0x16, 0xd0, 0x14, 0xc0, 0x12, 0x50, 0x10, 0x30, 0x0e, 0x60, 0x0c, 0x70};
REQUIRE(data.size() == expected.size());
CHECK(memcmp(data.data(), expected.data(), expected.size()) == 0);
@ -168,7 +170,8 @@ TEST_CASE("Dwarf2UnwindCodesX64")
UnwindBuilderDwarf2 unwind;
unwind.start();
unwind.startInfo();
unwind.startFunction();
unwind.save(rdi);
unwind.save(rsi);
unwind.save(rbx);
@ -179,11 +182,12 @@ TEST_CASE("Dwarf2UnwindCodesX64")
unwind.save(r15);
unwind.allocStack(72);
unwind.setupFrameReg(rbp, 48);
unwind.finish();
unwind.finishFunction(0, 0);
unwind.finishInfo();
std::vector<char> data;
data.resize(unwind.getSize());
unwind.finalize(data.data(), nullptr, 0);
unwind.finalize(data.data(), 0, nullptr, 0);
std::vector<uint8_t> expected{0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x78, 0x10, 0x0c, 0x07, 0x08, 0x05, 0x10, 0x01,
0x00, 0x00, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@ -211,6 +215,8 @@ constexpr X64::RegisterX64 rArg3 = X64::rdx;
constexpr X64::RegisterX64 rNonVol1 = X64::r12;
constexpr X64::RegisterX64 rNonVol2 = X64::rbx;
constexpr X64::RegisterX64 rNonVol3 = X64::r13;
constexpr X64::RegisterX64 rNonVol4 = X64::r14;
TEST_CASE("GeneratedCodeExecutionX64")
{
@ -260,7 +266,10 @@ TEST_CASE("GeneratedCodeExecutionWithThrowX64")
std::unique_ptr<UnwindBuilder> unwind = std::make_unique<UnwindBuilderDwarf2>();
#endif
unwind->start();
unwind->startInfo();
Label functionBegin = build.setLabel();
unwind->startFunction();
// Prologue
build.push(rNonVol1);
@ -279,8 +288,6 @@ TEST_CASE("GeneratedCodeExecutionWithThrowX64")
build.lea(rbp, addr[rsp + stackSize]);
unwind->setupFrameReg(rbp, stackSize);
unwind->finish();
// Body
build.mov(rNonVol1, rArg1);
build.mov(rNonVol2, rArg2);
@ -296,8 +303,12 @@ TEST_CASE("GeneratedCodeExecutionWithThrowX64")
build.pop(rNonVol1);
build.ret();
unwind->finishFunction(build.getLabelOffset(functionBegin), ~0u);
build.finalize();
unwind->finishInfo();
size_t blockSize = 1024 * 1024;
size_t maxTotalSize = 1024 * 1024;
CodeAllocator allocator(blockSize, maxTotalSize);
@ -326,6 +337,152 @@ TEST_CASE("GeneratedCodeExecutionWithThrowX64")
}
}
TEST_CASE("GeneratedCodeExecutionMultipleFunctionsWithThrowX64")
{
using namespace X64;
AssemblyBuilderX64 build(/* logText= */ false);
#if defined(_WIN32)
std::unique_ptr<UnwindBuilder> unwind = std::make_unique<UnwindBuilderWin>();
#else
std::unique_ptr<UnwindBuilder> unwind = std::make_unique<UnwindBuilderDwarf2>();
#endif
unwind->startInfo();
Label start1;
Label start2;
// First function
{
build.setLabel(start1);
unwind->startFunction();
// Prologue
build.push(rNonVol1);
unwind->save(rNonVol1);
build.push(rNonVol2);
unwind->save(rNonVol2);
build.push(rbp);
unwind->save(rbp);
int stackSize = 32;
int localsSize = 16;
build.sub(rsp, stackSize + localsSize);
unwind->allocStack(stackSize + localsSize);
build.lea(rbp, addr[rsp + stackSize]);
unwind->setupFrameReg(rbp, stackSize);
// Body
build.mov(rNonVol1, rArg1);
build.mov(rNonVol2, rArg2);
build.add(rNonVol1, 15);
build.mov(rArg1, rNonVol1);
build.call(rNonVol2);
// Epilogue
build.lea(rsp, addr[rbp + localsSize]);
build.pop(rbp);
build.pop(rNonVol2);
build.pop(rNonVol1);
build.ret();
Label end1 = build.setLabel();
unwind->finishFunction(build.getLabelOffset(start1), build.getLabelOffset(end1));
}
// Second function with different layout
{
build.setLabel(start2);
unwind->startFunction();
// Prologue
build.push(rNonVol1);
unwind->save(rNonVol1);
build.push(rNonVol2);
unwind->save(rNonVol2);
build.push(rNonVol3);
unwind->save(rNonVol3);
build.push(rNonVol4);
unwind->save(rNonVol4);
build.push(rbp);
unwind->save(rbp);
int stackSize = 32;
int localsSize = 32;
build.sub(rsp, stackSize + localsSize);
unwind->allocStack(stackSize + localsSize);
build.lea(rbp, addr[rsp + stackSize]);
unwind->setupFrameReg(rbp, stackSize);
// Body
build.mov(rNonVol3, rArg1);
build.mov(rNonVol4, rArg2);
build.add(rNonVol3, 15);
build.mov(rArg1, rNonVol3);
build.call(rNonVol4);
// Epilogue
build.lea(rsp, addr[rbp + localsSize]);
build.pop(rbp);
build.pop(rNonVol4);
build.pop(rNonVol3);
build.pop(rNonVol2);
build.pop(rNonVol1);
build.ret();
unwind->finishFunction(build.getLabelOffset(start2), ~0u);
}
build.finalize();
unwind->finishInfo();
size_t blockSize = 1024 * 1024;
size_t maxTotalSize = 1024 * 1024;
CodeAllocator allocator(blockSize, maxTotalSize);
allocator.context = unwind.get();
allocator.createBlockUnwindInfo = createBlockUnwindInfo;
allocator.destroyBlockUnwindInfo = destroyBlockUnwindInfo;
uint8_t* nativeData;
size_t sizeNativeData;
uint8_t* nativeEntry;
REQUIRE(allocator.allocate(build.data.data(), build.data.size(), build.code.data(), build.code.size(), nativeData, sizeNativeData, nativeEntry));
REQUIRE(nativeEntry);
using FunctionType = int64_t(int64_t, void (*)(int64_t));
FunctionType* f1 = (FunctionType*)(nativeEntry + start1.location);
FunctionType* f2 = (FunctionType*)(nativeEntry + start2.location);
// To simplify debugging, CHECK_THROWS_WITH_AS is not used here
try
{
f1(10, throwing);
}
catch (const std::runtime_error& error)
{
CHECK(strcmp(error.what(), "testing") == 0);
}
try
{
f2(10, throwing);
}
catch (const std::runtime_error& error)
{
CHECK(strcmp(error.what(), "testing") == 0);
}
}
TEST_CASE("GeneratedCodeExecutionWithThrowOutsideTheGateX64")
{
using namespace X64;
@ -338,7 +495,10 @@ TEST_CASE("GeneratedCodeExecutionWithThrowOutsideTheGateX64")
std::unique_ptr<UnwindBuilder> unwind = std::make_unique<UnwindBuilderDwarf2>();
#endif
unwind->start();
unwind->startInfo();
Label functionBegin = build.setLabel();
unwind->startFunction();
// Prologue (some of these registers don't have to be saved, but we want to have a big prologue)
build.push(r10);
@ -365,8 +525,6 @@ TEST_CASE("GeneratedCodeExecutionWithThrowOutsideTheGateX64")
build.lea(rbp, addr[rsp + stackSize]);
unwind->setupFrameReg(rbp, stackSize);
unwind->finish();
size_t prologueSize = build.setLabel().location;
// Body
@ -387,8 +545,12 @@ TEST_CASE("GeneratedCodeExecutionWithThrowOutsideTheGateX64")
build.pop(r10);
build.ret();
unwind->finishFunction(build.getLabelOffset(functionBegin), ~0u);
build.finalize();
unwind->finishInfo();
size_t blockSize = 4096; // Force allocate to create a new block each time
size_t maxTotalSize = 1024 * 1024;
CodeAllocator allocator(blockSize, maxTotalSize);

View File

@ -285,8 +285,16 @@ TEST_CASE("Tables")
lua_pushcfunction(
L,
[](lua_State* L) {
unsigned v = luaL_checkunsigned(L, 1);
lua_pushlightuserdata(L, reinterpret_cast<void*>(uintptr_t(v)));
if (lua_type(L, 1) == LUA_TNUMBER)
{
unsigned v = luaL_checkunsigned(L, 1);
lua_pushlightuserdata(L, reinterpret_cast<void*>(uintptr_t(v)));
}
else
{
const void* p = lua_topointer(L, 1);
lua_pushlightuserdata(L, const_cast<void*>(p));
}
return 1;
},
"makelud");
@ -402,21 +410,24 @@ TEST_CASE("PCall")
{
ScopedFastFlag sff("LuauBetterOOMHandling", true);
runConformance("pcall.lua", [](lua_State* L) {
lua_pushcfunction(L, cxxthrow, "cxxthrow");
lua_setglobal(L, "cxxthrow");
runConformance(
"pcall.lua",
[](lua_State* L) {
lua_pushcfunction(L, cxxthrow, "cxxthrow");
lua_setglobal(L, "cxxthrow");
lua_pushcfunction(
L,
[](lua_State* L) -> int {
lua_State* co = lua_tothread(L, 1);
lua_xmove(L, co, 1);
lua_resumeerror(co, L);
return 0;
},
"resumeerror");
lua_setglobal(L, "resumeerror");
}, nullptr, lua_newstate(limitedRealloc, nullptr));
lua_pushcfunction(
L,
[](lua_State* L) -> int {
lua_State* co = lua_tothread(L, 1);
lua_xmove(L, co, 1);
lua_resumeerror(co, L);
return 0;
},
"resumeerror");
lua_setglobal(L, "resumeerror");
},
nullptr, lua_newstate(limitedRealloc, nullptr));
}
TEST_CASE("Pack")

View File

@ -21,6 +21,7 @@
static const char* mainModuleName = "MainModule";
LUAU_FASTFLAG(DebugLuauDeferredConstraintResolution);
LUAU_FASTFLAG(LuauOnDemandTypecheckers);
extern std::optional<unsigned> randomSeed; // tests/main.cpp
@ -180,9 +181,16 @@ AstStatBlock* Fixture::parse(const std::string& source, const ParseOptions& pars
Luau::lint(sourceModule->root, *sourceModule->names, frontend.globals.globalScope, module.get(), sourceModule->hotcomments, {});
}
else if (!FFlag::LuauOnDemandTypecheckers)
{
ModulePtr module = frontend.typeChecker_DEPRECATED.check(*sourceModule, sourceModule->mode.value_or(Luau::Mode::Nonstrict));
Luau::lint(sourceModule->root, *sourceModule->names, frontend.globals.globalScope, module.get(), sourceModule->hotcomments, {});
}
else
{
ModulePtr module = frontend.typeChecker.check(*sourceModule, sourceModule->mode.value_or(Luau::Mode::Nonstrict));
TypeChecker typeChecker(frontend.globals.globalScope, &moduleResolver, builtinTypes, &frontend.iceHandler);
ModulePtr module = typeChecker.check(*sourceModule, sourceModule->mode.value_or(Luau::Mode::Nonstrict), std::nullopt);
Luau::lint(sourceModule->root, *sourceModule->names, frontend.globals.globalScope, module.get(), sourceModule->hotcomments, {});
}

View File

@ -3,6 +3,7 @@
#include "Luau/Module.h"
#include "Luau/Scope.h"
#include "Luau/RecursionCounter.h"
#include "Luau/Parser.h"
#include "Fixture.h"
@ -42,6 +43,38 @@ TEST_CASE_FIXTURE(Fixture, "is_within_comment")
CHECK(!isWithinComment(*sm, Position{7, 11}));
}
TEST_CASE_FIXTURE(Fixture, "is_within_comment_parse_result")
{
std::string src = R"(
--!strict
local foo = {}
function foo:bar() end
--[[
foo:
]] foo:bar()
--[[]]--[[]] -- Two distinct comments that have zero characters of space between them.
)";
Luau::Allocator alloc;
Luau::AstNameTable names{alloc};
Luau::ParseOptions parseOptions;
parseOptions.captureComments = true;
Luau::ParseResult parseResult = Luau::Parser::parse(src.data(), src.size(), names, alloc, parseOptions);
CHECK_EQ(5, parseResult.commentLocations.size());
CHECK(isWithinComment(parseResult, Position{1, 15}));
CHECK(isWithinComment(parseResult, Position{6, 16}));
CHECK(isWithinComment(parseResult, Position{9, 13}));
CHECK(isWithinComment(parseResult, Position{9, 14}));
CHECK(!isWithinComment(parseResult, Position{2, 15}));
CHECK(!isWithinComment(parseResult, Position{7, 10}));
CHECK(!isWithinComment(parseResult, Position{7, 11}));
}
TEST_CASE_FIXTURE(Fixture, "dont_clone_persistent_primitive")
{
TypeArena dest;
@ -319,6 +352,10 @@ TEST_CASE_FIXTURE(Fixture, "clone_recursion_limit")
TEST_CASE_FIXTURE(Fixture, "any_persistance_does_not_leak")
{
ScopedFastFlag flags[] = {
{"LuauOccursIsntAlwaysFailure", true},
};
fileResolver.source["Module/A"] = R"(
export type A = B
type B = A
@ -332,7 +369,7 @@ type B = A
auto mod = frontend.moduleResolver.getModule("Module/A");
auto it = mod->exportedTypeBindings.find("A");
REQUIRE(it != mod->exportedTypeBindings.end());
CHECK(toString(it->second.type) == "any");
CHECK(toString(it->second.type) == "*error-type*");
}
TEST_CASE_FIXTURE(BuiltinsFixture, "do_not_clone_reexports")

View File

@ -106,4 +106,22 @@ TEST_CASE("AreWeUsingDistanceWithAdjacentTranspositionsAndNotOptimalStringAlignm
CHECK_EQ(distance, 2);
}
TEST_CASE("EditDistanceSupportsUnicode")
{
// ASCII character
CHECK_EQ(Luau::editDistance("A block", "X block"), 1);
// UTF-8 2 byte character
CHECK_EQ(Luau::editDistance("A block", "À block"), 2);
// UTF-8 3 byte character
CHECK_EQ(Luau::editDistance("A block", "⪻ block"), 3);
// UTF-8 4 byte character
CHECK_EQ(Luau::editDistance("A block", "𒋄 block"), 4);
// UTF-8 extreme characters
CHECK_EQ(Luau::editDistance("A block", "R̴̨̢̟̚ŏ̶̳̳͚́ͅb̶̡̻̞̐̿ͅl̸̼͝ợ̷̜͓̒̏͜͝ẍ̴̝̦̟̰́̒́̌ block"), 85);
}
TEST_SUITE_END();

View File

@ -435,6 +435,10 @@ TEST_CASE_FIXTURE(Fixture, "typeof_expr")
TEST_CASE_FIXTURE(Fixture, "corecursive_types_error_on_tight_loop")
{
ScopedFastFlag flags[] = {
{"LuauOccursIsntAlwaysFailure", true},
};
CheckResult result = check(R"(
type A = B
type B = A
@ -443,10 +447,10 @@ TEST_CASE_FIXTURE(Fixture, "corecursive_types_error_on_tight_loop")
local bb:B
)");
TypeId fType = requireType("aa");
const AnyType* ftv = get<AnyType>(follow(fType));
REQUIRE(ftv != nullptr);
REQUIRE(!result.errors.empty());
LUAU_REQUIRE_ERROR_COUNT(1, result);
OccursCheckFailed* ocf = get<OccursCheckFailed>(result.errors[0]);
REQUIRE(ocf);
}
TEST_CASE_FIXTURE(Fixture, "type_alias_always_resolve_to_a_real_type")
@ -762,6 +766,7 @@ TEST_CASE_FIXTURE(Fixture, "occurs_check_on_cyclic_union_type")
{
CheckResult result = check(R"(
type T = T | T
local x : T
)");
LUAU_REQUIRE_ERROR_COUNT(1, result);

View File

@ -1281,6 +1281,39 @@ f(function(x) return x * 2 end)
LUAU_REQUIRE_NO_ERRORS(result);
}
TEST_CASE_FIXTURE(Fixture, "variadic_any_is_compatible_with_a_generic_TypePack")
{
ScopedFastFlag sff[] = {
{"LuauVariadicAnyCanBeGeneric", true}
};
CheckResult result = check(R"(
--!strict
local function f(...) return ... end
local g = function(...) return f(...) end
)");
LUAU_REQUIRE_NO_ERRORS(result);
}
// https://github.com/Roblox/luau/issues/767
TEST_CASE_FIXTURE(BuiltinsFixture, "variadic_any_is_compatible_with_a_generic_TypePack_2")
{
ScopedFastFlag sff{"LuauVariadicAnyCanBeGeneric", true};
CheckResult result = check(R"(
local function somethingThatsAny(...: any)
print(...)
end
local function x<T...>(...: T...)
somethingThatsAny(...) -- Failed to unify variadic type packs
end
)");
LUAU_REQUIRE_NO_ERRORS(result);
}
TEST_CASE_FIXTURE(Fixture, "infer_anonymous_function_arguments_outside_call")
{
CheckResult result = check(R"(

View File

@ -53,10 +53,6 @@ TEST_CASE_FIXTURE(Fixture, "or_joins_types_with_no_superfluous_union")
TEST_CASE_FIXTURE(Fixture, "and_does_not_always_add_boolean")
{
ScopedFastFlag sff[]{
{"LuauTryhardAnd", true},
};
CheckResult result = check(R"(
local s = "a" and 10
local x:boolean|number = s
@ -737,6 +733,8 @@ TEST_CASE_FIXTURE(Fixture, "error_on_invalid_operand_types_to_relational_operato
TEST_CASE_FIXTURE(Fixture, "cli_38355_recursive_union")
{
ScopedFastFlag sff{"LuauOccursIsntAlwaysFailure", true};
CheckResult result = check(R"(
--!strict
local _
@ -744,7 +742,7 @@ TEST_CASE_FIXTURE(Fixture, "cli_38355_recursive_union")
)");
LUAU_REQUIRE_ERROR_COUNT(1, result);
CHECK_EQ("Type contains a self-recursive construct that cannot be resolved", toString(result.errors[0]));
CHECK_EQ("Unknown type used in + operation; consider adding a type annotation to '_'", toString(result.errors[0]));
}
TEST_CASE_FIXTURE(BuiltinsFixture, "UnknownGlobalCompoundAssign")
@ -1048,10 +1046,6 @@ TEST_CASE_FIXTURE(BuiltinsFixture, "mm_comparisons_must_return_a_boolean")
TEST_CASE_FIXTURE(BuiltinsFixture, "reworked_and")
{
ScopedFastFlag sff[]{
{"LuauTryhardAnd", true},
};
CheckResult result = check(R"(
local a: number? = 5
local b: boolean = (a or 1) > 10
@ -1077,10 +1071,6 @@ local w = c and 1
TEST_CASE_FIXTURE(BuiltinsFixture, "reworked_or")
{
ScopedFastFlag sff[]{
{"LuauTryhardAnd", true},
};
CheckResult result = check(R"(
local a: number | false = 5
local b: number? = 6
@ -1115,11 +1105,6 @@ local f1 = f or 'f'
TEST_CASE_FIXTURE(BuiltinsFixture, "reducing_and")
{
ScopedFastFlag sff[]{
{"LuauTryhardAnd", true},
{"LuauReducingAndOr", true},
};
CheckResult result = check(R"(
type Foo = { name: string?, flag: boolean? }
local arr: {Foo} = {}
@ -1137,4 +1122,61 @@ end
LUAU_REQUIRE_NO_ERRORS(result);
}
TEST_CASE_FIXTURE(BuiltinsFixture, "luau_polyfill_is_array_simplified")
{
CheckResult result = check(R"(
--!strict
return function(value: any) : boolean
if typeof(value) ~= "number" then
return false
end
if value % 1 ~= 0 or value < 1 then
return false
end
return true
end
)");
LUAU_REQUIRE_NO_ERRORS(result);
}
TEST_CASE_FIXTURE(BuiltinsFixture, "luau_polyfill_is_array")
{
CheckResult result = check(R"(
--!strict
return function(value: any): boolean
if typeof(value) ~= "table" then
return false
end
if next(value) == nil then
-- an empty table is an empty array
return true
end
local length = #value
if length == 0 then
return false
end
local count = 0
local sum = 0
for key in pairs(value) do
if typeof(key) ~= "number" then
return false
end
if key % 1 ~= 0 or key < 1 then
return false
end
count += 1
sum += key
end
return sum == (count * (count + 1) / 2)
end
)");
LUAU_REQUIRE_NO_ERRORS(result);
}
TEST_SUITE_END();

View File

@ -320,23 +320,6 @@ TEST_CASE_FIXTURE(Fixture, "weird_fail_to_unify_type_pack")
LUAU_REQUIRE_ERRORS(result); // Should not have any errors.
}
TEST_CASE_FIXTURE(Fixture, "weird_fail_to_unify_variadic_pack")
{
ScopedFastFlag sff[] = {
// I'm not sure why this is broken without DCR, but it seems to be fixed
// when DCR is enabled.
{"DebugLuauDeferredConstraintResolution", false},
};
CheckResult result = check(R"(
--!strict
local function f(...) return ... end
local g = function(...) return f(...) end
)");
LUAU_REQUIRE_ERRORS(result); // Should not have any errors.
}
// Belongs in TypeInfer.builtins.test.cpp.
TEST_CASE_FIXTURE(BuiltinsFixture, "pcall_returns_at_least_two_value_but_function_returns_nothing")
{
@ -819,4 +802,23 @@ TEST_CASE_FIXTURE(BuiltinsFixture, "table_insert_with_a_singleton_argument")
}
}
// We really should be warning on this. We have no guarantee that T has any properties.
TEST_CASE_FIXTURE(Fixture, "lookup_prop_of_intersection_containing_unions_of_tables_that_have_the_prop")
{
CheckResult result = check(R"(
local function mergeOptions<T>(options: T & ({variable: string} | {variable: number}))
return options.variable
end
)");
LUAU_REQUIRE_NO_ERRORS(result);
// LUAU_REQUIRE_ERROR_COUNT(1, result);
// const UnknownProperty* unknownProp = get<UnknownProperty>(result.errors[0]);
// REQUIRE(unknownProp);
// CHECK("variable" == unknownProp->key);
}
TEST_SUITE_END();

View File

@ -1195,6 +1195,21 @@ local b = typeof(foo) ~= 'nil'
CHECK(toString(result.errors[1]) == "Unknown global 'foo'");
}
TEST_CASE_FIXTURE(Fixture, "occurs_isnt_always_failure")
{
ScopedFastFlag sff{"LuauOccursIsntAlwaysFailure", true};
CheckResult result = check(R"(
function f(x, c) -- x : X
local y = if c then x else nil -- y : X?
local z = if c then x else nil -- z : X?
y = z
end
)");
LUAU_REQUIRE_NO_ERRORS(result);
}
TEST_CASE_FIXTURE(Fixture, "dcr_delays_expansion_of_function_containing_blocked_parameter_type")
{
ScopedFastFlag sff[] = {

View File

@ -776,4 +776,20 @@ TEST_CASE_FIXTURE(Fixture, "generic_function_with_optional_arg")
LUAU_REQUIRE_NO_ERRORS(result);
}
TEST_CASE_FIXTURE(Fixture, "lookup_prop_of_intersection_containing_unions")
{
CheckResult result = check(R"(
local function mergeOptions<T>(options: T & ({} | {}))
return options.variables
end
)");
LUAU_REQUIRE_ERROR_COUNT(1, result);
const UnknownProperty* unknownProp = get<UnknownProperty>(result.errors[0]);
REQUIRE(unknownProp);
CHECK("variables" == unknownProp->key);
}
TEST_SUITE_END();

View File

@ -301,11 +301,6 @@ TEST_CASE_FIXTURE(Fixture, "length_of_never")
TEST_CASE_FIXTURE(Fixture, "dont_unify_operands_if_one_of_the_operand_is_never_in_any_ordering_operators")
{
ScopedFastFlag sff[]{
{"LuauTryhardAnd", true},
{"LuauReducingAndOr", true},
};
CheckResult result = check(R"(
local function ord(x: nil, y)
return x ~= nil and x > y

View File

@ -273,12 +273,14 @@ TEST_CASE_FIXTURE(Fixture, "substitution_skip_failure")
TypeId root = &ttvTweenResult;
frontend.typeChecker.currentModule = std::make_shared<Module>();
frontend.typeChecker.currentModule->scopes.emplace_back(Location{}, std::make_shared<Scope>(builtinTypes->anyTypePack));
ModulePtr currentModule = std::make_shared<Module>();
Anyification anyification(&currentModule->internalTypes, frontend.globals.globalScope, builtinTypes, &frontend.iceHandler, builtinTypes->anyType,
builtinTypes->anyTypePack);
std::optional<TypeId> any = anyification.substitute(root);
TypeId result = frontend.typeChecker.anyify(frontend.globals.globalScope, root, Location{});
CHECK_EQ("{| f: t1 |} where t1 = () -> {| f: () -> {| f: ({| f: t1 |}) -> (), signal: {| f: (any) -> () |} |} |}", toString(result));
REQUIRE(!anyification.normalizationTooComplex);
REQUIRE(any.has_value());
CHECK_EQ("{| f: t1 |} where t1 = () -> {| f: () -> {| f: ({| f: t1 |}) -> (), signal: {| f: (any) -> () |} |} |}", toString(*any));
}
TEST_CASE("tagging_tables")

View File

@ -347,5 +347,15 @@ assert(select('#', math.ceil(1.6)) == 1)
assert(select('#', math.sqrt(9)) == 1)
assert(select('#', math.deg(9)) == 1)
assert(select('#', math.rad(9)) == 1)
assert(select('#', math.sin(1.5)) == 1)
assert(select('#', math.atan2(1.5, 0.5)) == 1)
assert(select('#', math.modf(1.5)) == 2)
assert(select('#', math.frexp(1.5)) == 2)
-- test that fastcalls that return variadic results return them correctly in variadic position
assert(select(1, math.modf(1.5)) == 1)
assert(select(2, math.modf(1.5)) == 0.5)
assert(select(1, math.frexp(1.5)) == 0.75)
assert(select(2, math.frexp(1.5)) == 1)
return('OK')

View File

@ -715,4 +715,11 @@ do
end
end
-- check that fast path for table lookup can't be tricked into assuming a light user data with string pointer is a string
assert((function ()
local t = {}
t[makelud("hi")] = "no"
return t.hi
end)() == nil)
return"OK"

View File

@ -34,7 +34,7 @@ source = """// This file is part of the Luau programming language and is license
function = ""
signature = ""
includeInsts = ["LOP_NEWCLOSURE", "LOP_NAMECALL", "LOP_FORGPREP", "LOP_GETVARARGS", "LOP_DUPCLOSURE", "LOP_PREPVARARGS", "LOP_BREAK", "LOP_GETGLOBAL", "LOP_SETGLOBAL", "LOP_GETTABLEKS", "LOP_SETTABLEKS"]
includeInsts = ["LOP_NEWCLOSURE", "LOP_NAMECALL", "LOP_FORGPREP", "LOP_GETVARARGS", "LOP_DUPCLOSURE", "LOP_PREPVARARGS", "LOP_BREAK", "LOP_GETGLOBAL", "LOP_SETGLOBAL", "LOP_GETTABLEKS", "LOP_SETTABLEKS", "LOP_SETLIST"]
state = 0