luau/CodeGen/src/CodeGen.cpp

635 lines
19 KiB
C++
Raw Normal View History

// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/CodeGen.h"
#include "Luau/AssemblyBuilderX64.h"
#include "Luau/Common.h"
#include "Luau/CodeAllocator.h"
#include "Luau/CodeBlockUnwind.h"
#include "Luau/UnwindBuilder.h"
#include "Luau/UnwindBuilderDwarf2.h"
#include "Luau/UnwindBuilderWin.h"
#include "CustomExecUtils.h"
#include "CodeGenX64.h"
#include "EmitCommonX64.h"
#include "EmitInstructionX64.h"
#include "NativeState.h"
#include "lapi.h"
#include <memory>
#if defined(__x86_64__) || defined(_M_X64)
#ifdef _MSC_VER
#include <intrin.h> // __cpuid
#else
#include <cpuid.h> // __cpuid
#endif
#endif
namespace Luau
{
namespace CodeGen
{
constexpr uint32_t kFunctionAlignment = 32;
static NativeProto* assembleFunction(AssemblyBuilderX64& build, NativeState& data, Proto* proto, AssemblyOptions options)
{
NativeProto* result = new NativeProto();
result->proto = proto;
if (build.logText)
{
if (proto->debugname)
build.logAppend("; function %s()", getstr(proto->debugname));
else
build.logAppend("; function()");
if (proto->linedefined >= 0)
build.logAppend(" line %d\n", proto->linedefined);
else
build.logAppend("\n");
}
std::vector<Label> instLabels;
instLabels.resize(proto->sizecode);
std::vector<Label> instFallbacks;
instFallbacks.resize(proto->sizecode);
build.align(kFunctionAlignment, AlignmentDataX64::Ud2);
Label start = build.setLabel();
for (int i = 0, instid = 0; i < proto->sizecode; ++instid)
{
const Instruction* pc = &proto->code[i];
LuauOpcode op = LuauOpcode(LUAU_INSN_OP(*pc));
build.setLabel(instLabels[i]);
if (options.annotator)
options.annotator(options.annotatorContext, build.text, proto->bytecodeid, instid);
switch (op)
{
case LOP_NOP:
break;
case LOP_LOADNIL:
emitInstLoadNil(build, pc);
break;
case LOP_LOADB:
emitInstLoadB(build, pc, i, instLabels.data());
break;
case LOP_LOADN:
emitInstLoadN(build, pc);
break;
case LOP_LOADK:
emitInstLoadK(build, pc);
break;
case LOP_LOADKX:
emitInstLoadKX(build, pc);
break;
case LOP_MOVE:
emitInstMove(build, pc);
break;
case LOP_GETGLOBAL:
emitInstGetGlobal(build, pc, i, instFallbacks[i]);
break;
case LOP_SETGLOBAL:
emitInstSetGlobal(build, pc, i, instLabels.data(), instFallbacks[i]);
break;
case LOP_GETTABLE:
emitInstGetTable(build, pc, i, instFallbacks[i]);
break;
case LOP_SETTABLE:
emitInstSetTable(build, pc, i, instLabels.data(), instFallbacks[i]);
break;
case LOP_GETTABLEKS:
emitInstGetTableKS(build, pc, i, instFallbacks[i]);
break;
case LOP_SETTABLEKS:
emitInstSetTableKS(build, pc, i, instLabels.data(), instFallbacks[i]);
break;
case LOP_GETTABLEN:
emitInstGetTableN(build, pc, i, instFallbacks[i]);
break;
case LOP_SETTABLEN:
emitInstSetTableN(build, pc, i, instLabels.data(), instFallbacks[i]);
break;
case LOP_JUMP:
emitInstJump(build, pc, i, instLabels.data());
break;
case LOP_JUMPBACK:
emitInstJumpBack(build, pc, i, instLabels.data());
break;
case LOP_JUMPIF:
emitInstJumpIf(build, pc, i, instLabels.data(), /* not_ */ false);
break;
case LOP_JUMPIFNOT:
emitInstJumpIf(build, pc, i, instLabels.data(), /* not_ */ true);
break;
case LOP_JUMPIFEQ:
emitInstJumpIfEq(build, pc, i, instLabels.data(), /* not_ */ false, instFallbacks[i]);
break;
case LOP_JUMPIFLE:
emitInstJumpIfCond(build, pc, i, instLabels.data(), Condition::LessEqual, instFallbacks[i]);
break;
case LOP_JUMPIFLT:
emitInstJumpIfCond(build, pc, i, instLabels.data(), Condition::Less, instFallbacks[i]);
break;
case LOP_JUMPIFNOTEQ:
emitInstJumpIfEq(build, pc, i, instLabels.data(), /* not_ */ true, instFallbacks[i]);
break;
case LOP_JUMPIFNOTLE:
emitInstJumpIfCond(build, pc, i, instLabels.data(), Condition::NotLessEqual, instFallbacks[i]);
break;
case LOP_JUMPIFNOTLT:
emitInstJumpIfCond(build, pc, i, instLabels.data(), Condition::NotLess, instFallbacks[i]);
break;
case LOP_JUMPX:
emitInstJumpX(build, pc, i, instLabels.data());
break;
case LOP_JUMPXEQKNIL:
emitInstJumpxEqNil(build, pc, i, instLabels.data());
break;
case LOP_JUMPXEQKB:
emitInstJumpxEqB(build, pc, i, instLabels.data());
break;
case LOP_JUMPXEQKN:
emitInstJumpxEqN(build, pc, proto->k, i, instLabels.data());
break;
case LOP_JUMPXEQKS:
emitInstJumpxEqS(build, pc, i, instLabels.data());
break;
case LOP_ADD:
emitInstBinary(build, pc, i, TM_ADD, instFallbacks[i]);
break;
case LOP_SUB:
emitInstBinary(build, pc, i, TM_SUB, instFallbacks[i]);
break;
case LOP_MUL:
emitInstBinary(build, pc, i, TM_MUL, instFallbacks[i]);
break;
case LOP_DIV:
emitInstBinary(build, pc, i, TM_DIV, instFallbacks[i]);
break;
case LOP_MOD:
emitInstBinary(build, pc, i, TM_MOD, instFallbacks[i]);
break;
case LOP_POW:
emitInstBinary(build, pc, i, TM_POW, instFallbacks[i]);
break;
case LOP_ADDK:
emitInstBinaryK(build, pc, i, TM_ADD, instFallbacks[i]);
break;
case LOP_SUBK:
emitInstBinaryK(build, pc, i, TM_SUB, instFallbacks[i]);
break;
case LOP_MULK:
emitInstBinaryK(build, pc, i, TM_MUL, instFallbacks[i]);
break;
case LOP_DIVK:
emitInstBinaryK(build, pc, i, TM_DIV, instFallbacks[i]);
break;
case LOP_MODK:
emitInstBinaryK(build, pc, i, TM_MOD, instFallbacks[i]);
break;
case LOP_POWK:
emitInstPowK(build, pc, proto->k, i, instFallbacks[i]);
break;
case LOP_NOT:
emitInstNot(build, pc);
break;
case LOP_MINUS:
emitInstMinus(build, pc, i, instFallbacks[i]);
break;
case LOP_LENGTH:
emitInstLength(build, pc, i, instFallbacks[i]);
break;
case LOP_NEWTABLE:
emitInstNewTable(build, pc, i, instLabels.data());
break;
case LOP_DUPTABLE:
emitInstDupTable(build, pc, i, instLabels.data());
break;
case LOP_SETLIST:
emitInstSetList(build, pc, i, instLabels.data());
break;
case LOP_GETUPVAL:
emitInstGetUpval(build, pc, i);
break;
case LOP_SETUPVAL:
emitInstSetUpval(build, pc, i, instLabels.data());
break;
case LOP_CLOSEUPVALS:
emitInstCloseUpvals(build, pc, i, instLabels.data());
break;
case LOP_FASTCALL:
emitInstFastCall(build, pc, i, instLabels.data());
break;
case LOP_FASTCALL1:
emitInstFastCall1(build, pc, i, instLabels.data());
break;
case LOP_FASTCALL2:
emitInstFastCall2(build, pc, i, instLabels.data());
break;
case LOP_FASTCALL2K:
emitInstFastCall2K(build, pc, i, instLabels.data());
break;
case LOP_FORNPREP:
emitInstForNPrep(build, pc, i, instLabels.data());
break;
case LOP_FORNLOOP:
emitInstForNLoop(build, pc, i, instLabels.data());
break;
case LOP_AND:
emitInstAnd(build, pc);
break;
case LOP_ANDK:
emitInstAndK(build, pc);
break;
case LOP_OR:
emitInstOr(build, pc);
break;
case LOP_ORK:
emitInstOrK(build, pc);
break;
case LOP_GETIMPORT:
emitInstGetImport(build, pc, instFallbacks[i]);
break;
case LOP_CONCAT:
emitInstConcat(build, pc, i, instLabels.data());
break;
default:
emitFallback(build, data, op, i);
break;
}
i += getOpLength(op);
LUAU_ASSERT(i <= proto->sizecode);
}
size_t textSize = build.text.size();
uint32_t codeSize = build.getCodeSize();
if (options.annotator && !options.skipOutlinedCode)
build.logAppend("; outlined code\n");
for (int i = 0, instid = 0; i < proto->sizecode; ++instid)
{
const Instruction* pc = &proto->code[i];
LuauOpcode op = LuauOpcode(LUAU_INSN_OP(*pc));
int nexti = i + getOpLength(op);
LUAU_ASSERT(nexti <= proto->sizecode);
if (instFallbacks[i].id == 0)
{
i = nexti;
continue;
}
if (options.annotator && !options.skipOutlinedCode)
options.annotator(options.annotatorContext, build.text, proto->bytecodeid, instid);
build.setLabel(instFallbacks[i]);
switch (op)
{
case LOP_GETIMPORT:
emitInstGetImportFallback(build, pc, i);
break;
case LOP_GETTABLE:
emitInstGetTableFallback(build, pc, i);
break;
case LOP_SETTABLE:
emitInstSetTableFallback(build, pc, i);
break;
case LOP_GETTABLEN:
emitInstGetTableNFallback(build, pc, i);
break;
case LOP_SETTABLEN:
emitInstSetTableNFallback(build, pc, i);
break;
case LOP_JUMPIFEQ:
emitInstJumpIfEqFallback(build, pc, i, instLabels.data(), /* not_ */ false);
break;
case LOP_JUMPIFLE:
emitInstJumpIfCondFallback(build, pc, i, instLabels.data(), Condition::LessEqual);
break;
case LOP_JUMPIFLT:
emitInstJumpIfCondFallback(build, pc, i, instLabels.data(), Condition::Less);
break;
case LOP_JUMPIFNOTEQ:
emitInstJumpIfEqFallback(build, pc, i, instLabels.data(), /* not_ */ true);
break;
case LOP_JUMPIFNOTLE:
emitInstJumpIfCondFallback(build, pc, i, instLabels.data(), Condition::NotLessEqual);
break;
case LOP_JUMPIFNOTLT:
emitInstJumpIfCondFallback(build, pc, i, instLabels.data(), Condition::NotLess);
break;
case LOP_ADD:
emitInstBinaryFallback(build, pc, i, TM_ADD);
break;
case LOP_SUB:
emitInstBinaryFallback(build, pc, i, TM_SUB);
break;
case LOP_MUL:
emitInstBinaryFallback(build, pc, i, TM_MUL);
break;
case LOP_DIV:
emitInstBinaryFallback(build, pc, i, TM_DIV);
break;
case LOP_MOD:
emitInstBinaryFallback(build, pc, i, TM_MOD);
break;
case LOP_POW:
emitInstBinaryFallback(build, pc, i, TM_POW);
break;
case LOP_ADDK:
emitInstBinaryKFallback(build, pc, i, TM_ADD);
break;
case LOP_SUBK:
emitInstBinaryKFallback(build, pc, i, TM_SUB);
break;
case LOP_MULK:
emitInstBinaryKFallback(build, pc, i, TM_MUL);
break;
case LOP_DIVK:
emitInstBinaryKFallback(build, pc, i, TM_DIV);
break;
case LOP_MODK:
emitInstBinaryKFallback(build, pc, i, TM_MOD);
break;
case LOP_POWK:
emitInstBinaryKFallback(build, pc, i, TM_POW);
break;
case LOP_MINUS:
emitInstMinusFallback(build, pc, i);
break;
case LOP_LENGTH:
emitInstLengthFallback(build, pc, i);
break;
case LOP_GETGLOBAL:
// TODO: luaV_gettable + cachedslot update instead of full fallback
emitFallback(build, data, op, i);
break;
case LOP_SETGLOBAL:
// TODO: luaV_settable + cachedslot update instead of full fallback
emitFallback(build, data, op, i);
break;
case LOP_GETTABLEKS:
// Full fallback required for LOP_GETTABLEKS because 'luaV_gettable' doesn't handle builtin vector field access
// It is also required to perform cached slot update
// TODO: extra fast-paths could be lowered before the full fallback
emitFallback(build, data, op, i);
break;
case LOP_SETTABLEKS:
// TODO: luaV_settable + cachedslot update instead of full fallback
emitFallback(build, data, op, i);
break;
default:
LUAU_ASSERT(!"Expected fallback for instruction");
}
// Jump back to the next instruction handler
if (nexti < proto->sizecode)
build.jmp(instLabels[nexti]);
i = nexti;
}
// Truncate assembly output if we don't care for outlined code part
if (options.skipOutlinedCode)
{
build.text.resize(textSize);
build.logAppend("; skipping %u bytes of outlined code\n", build.getCodeSize() - codeSize);
}
result->instTargets = new uintptr_t[proto->sizecode];
for (int i = 0; i < proto->sizecode; i++)
result->instTargets[i] = instLabels[i].location - start.location;
result->location = start.location;
if (build.logText)
build.logAppend("\n");
return result;
}
static void destroyNativeProto(NativeProto* nativeProto)
{
delete[] nativeProto->instTargets;
delete nativeProto;
}
static void onCloseState(lua_State* L)
{
destroyNativeState(L);
}
static void onDestroyFunction(lua_State* L, Proto* proto)
{
NativeProto* nativeProto = getProtoExecData(proto);
LUAU_ASSERT(nativeProto->proto == proto);
setProtoExecData(proto, nullptr);
destroyNativeProto(nativeProto);
}
static int onEnter(lua_State* L, Proto* proto)
{
if (L->singlestep)
return 1;
NativeState* data = getNativeState(L);
if (!L->ci->savedpc)
L->ci->savedpc = proto->code;
// We will jump into native code through a gateway
bool (*gate)(lua_State*, Proto*, uintptr_t, NativeContext*) = (bool (*)(lua_State*, Proto*, uintptr_t, NativeContext*))data->context.gateEntry;
NativeProto* nativeProto = getProtoExecData(proto);
uintptr_t target = nativeProto->instTargets[L->ci->savedpc - proto->code];
// Returns 1 to finish the function in the VM
return gate(L, proto, target, &data->context);
}
static void onSetBreakpoint(lua_State* L, Proto* proto, int instruction)
{
if (!getProtoExecData(proto))
return;
LUAU_ASSERT(!"native breakpoints are not implemented");
}
bool isSupported()
{
#if !LUA_CUSTOM_EXECUTION
return false;
#elif defined(__x86_64__) || defined(_M_X64)
if (LUA_EXTRA_SIZE != 1)
return false;
if (sizeof(TValue) != 16)
return false;
if (sizeof(LuaNode) != 32)
return false;
int cpuinfo[4] = {};
#ifdef _MSC_VER
__cpuid(cpuinfo, 1);
#else
__cpuid(1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
#endif
// We require AVX1 support for VEX encoded XMM operations
// We also requre SSE4.1 support for ROUNDSD but the AVX check below covers it
// https://en.wikipedia.org/wiki/CPUID#EAX=1:_Processor_Info_and_Feature_Bits
if ((cpuinfo[2] & (1 << 28)) == 0)
return false;
return true;
#else
return false;
#endif
}
void create(lua_State* L)
{
LUAU_ASSERT(isSupported());
NativeState& data = *createNativeState(L);
#if defined(_WIN32)
data.unwindBuilder = std::make_unique<UnwindBuilderWin>();
#else
data.unwindBuilder = std::make_unique<UnwindBuilderDwarf2>();
#endif
data.codeAllocator.context = data.unwindBuilder.get();
data.codeAllocator.createBlockUnwindInfo = createBlockUnwindInfo;
data.codeAllocator.destroyBlockUnwindInfo = destroyBlockUnwindInfo;
initFallbackTable(data);
initHelperFunctions(data);
if (!x64::initEntryFunction(data))
{
destroyNativeState(L);
return;
}
lua_ExecutionCallbacks* ecb = getExecutionCallbacks(L);
ecb->close = onCloseState;
ecb->destroy = onDestroyFunction;
ecb->enter = onEnter;
ecb->setbreakpoint = onSetBreakpoint;
}
static void gatherFunctions(std::vector<Proto*>& results, Proto* proto)
{
if (results.size() <= size_t(proto->bytecodeid))
results.resize(proto->bytecodeid + 1);
// Skip protos that we've already compiled in this run: this happens because at -O2, inlined functions get their protos reused
if (results[proto->bytecodeid])
return;
results[proto->bytecodeid] = proto;
for (int i = 0; i < proto->sizep; i++)
gatherFunctions(results, proto->p[i]);
}
void compile(lua_State* L, int idx)
{
LUAU_ASSERT(lua_isLfunction(L, idx));
const TValue* func = luaA_toobject(L, idx);
// If initialization has failed, do not compile any functions
if (!getNativeState(L))
return;
AssemblyBuilderX64 build(/* logText= */ false);
NativeState* data = getNativeState(L);
std::vector<Proto*> protos;
gatherFunctions(protos, clvalue(func)->l.p);
std::vector<NativeProto*> results;
results.reserve(protos.size());
// Skip protos that have been compiled during previous invocations of CodeGen::compile
for (Proto* p : protos)
if (p && getProtoExecData(p) == nullptr)
results.push_back(assembleFunction(build, *data, p, {}));
build.finalize();
uint8_t* nativeData = nullptr;
size_t sizeNativeData = 0;
uint8_t* codeStart = nullptr;
if (!data->codeAllocator.allocate(
build.data.data(), int(build.data.size()), build.code.data(), int(build.code.size()), nativeData, sizeNativeData, codeStart))
{
for (NativeProto* result : results)
destroyNativeProto(result);
return;
}
// Relocate instruction offsets
for (NativeProto* result : results)
{
for (int i = 0; i < result->proto->sizecode; i++)
result->instTargets[i] += uintptr_t(codeStart + result->location);
}
// Link native proto objects to Proto; the memory is now managed by VM and will be freed via onDestroyFunction
for (NativeProto* result : results)
setProtoExecData(result->proto, result);
}
std::string getAssembly(lua_State* L, int idx, AssemblyOptions options)
{
LUAU_ASSERT(lua_isLfunction(L, idx));
const TValue* func = luaA_toobject(L, idx);
AssemblyBuilderX64 build(/* logText= */ !options.outputBinary);
NativeState data;
initFallbackTable(data);
std::vector<Proto*> protos;
gatherFunctions(protos, clvalue(func)->l.p);
for (Proto* p : protos)
if (p)
{
NativeProto* nativeProto = assembleFunction(build, data, p, options);
destroyNativeProto(nativeProto);
}
build.finalize();
if (options.outputBinary)
return std::string(build.code.begin(), build.code.end()) + std::string(build.data.begin(), build.data.end());
else
return build.text;
}
} // namespace CodeGen
} // namespace Luau