diff --git a/Analysis/include/Luau/TypeInfer.h b/Analysis/include/Luau/TypeInfer.h index 80f9085..e253edd 100644 --- a/Analysis/include/Luau/TypeInfer.h +++ b/Analysis/include/Luau/TypeInfer.h @@ -107,6 +107,7 @@ struct TypeChecker WithPredicate checkExpr(const ScopePtr& scope, const AstExprTypeAssertion& expr); WithPredicate checkExpr(const ScopePtr& scope, const AstExprError& expr); WithPredicate checkExpr(const ScopePtr& scope, const AstExprIfElse& expr, std::optional expectedType = std::nullopt); + WithPredicate checkExpr(const ScopePtr& scope, const AstExprInterpString& expr); TypeId checkExprTable(const ScopePtr& scope, const AstExprTable& expr, const std::vector>& fieldTypes, std::optional expectedType); diff --git a/Analysis/src/AstJsonEncoder.cpp b/Analysis/src/AstJsonEncoder.cpp index 2897875..8d58903 100644 --- a/Analysis/src/AstJsonEncoder.cpp +++ b/Analysis/src/AstJsonEncoder.cpp @@ -445,6 +445,14 @@ struct AstJsonEncoder : public AstVisitor }); } + void write(class AstExprInterpString* node) + { + writeNode(node, "AstExprInterpString", [&]() { + PROP(strings); + PROP(expressions); + }); + } + void write(class AstExprTable* node) { writeNode(node, "AstExprTable", [&]() { @@ -888,6 +896,12 @@ struct AstJsonEncoder : public AstVisitor return false; } + bool visit(class AstExprInterpString* node) override + { + write(node); + return false; + } + bool visit(class AstExprLocal* node) override { write(node); diff --git a/Analysis/src/Linter.cpp b/Analysis/src/Linter.cpp index 2d05837..9f89efe 100644 --- a/Analysis/src/Linter.cpp +++ b/Analysis/src/Linter.cpp @@ -206,6 +206,24 @@ static bool similar(AstExpr* lhs, AstExpr* rhs) return true; } CASE(AstExprIfElse) return similar(le->condition, re->condition) && similar(le->trueExpr, re->trueExpr) && similar(le->falseExpr, re->falseExpr); + CASE(AstExprInterpString) + { + if (le->strings.size != re->strings.size) + return false; + + if (le->expressions.size != re->expressions.size) + return false; + + for (size_t i = 0; i < le->strings.size; ++i) + if (le->strings.data[i].size != re->strings.data[i].size || memcmp(le->strings.data[i].data, re->strings.data[i].data, le->strings.data[i].size) != 0) + return false; + + for (size_t i = 0; i < le->expressions.size; ++i) + if (!similar(le->expressions.data[i], re->expressions.data[i])) + return false; + + return true; + } else { LUAU_ASSERT(!"Unknown expression type"); diff --git a/Analysis/src/Transpiler.cpp b/Analysis/src/Transpiler.cpp index 9feff1c..cdfe654 100644 --- a/Analysis/src/Transpiler.cpp +++ b/Analysis/src/Transpiler.cpp @@ -511,6 +511,28 @@ struct Printer writer.keyword("else"); visualize(*a->falseExpr); } + else if (const auto& a = expr.as()) + { + writer.symbol("`"); + + size_t index = 0; + + for (const auto& string : a->strings) + { + writer.write(escape(std::string_view(string.data, string.size), /* escapeForInterpString = */ true)); + + if (index < a->expressions.size) + { + writer.symbol("{"); + visualize(*a->expressions.data[index]); + writer.symbol("}"); + } + + index++; + } + + writer.symbol("`"); + } else if (const auto& a = expr.as()) { writer.symbol("(error-expr"); diff --git a/Analysis/src/TypeInfer.cpp b/Analysis/src/TypeInfer.cpp index 9886fb1..7716805 100644 --- a/Analysis/src/TypeInfer.cpp +++ b/Analysis/src/TypeInfer.cpp @@ -1805,6 +1805,8 @@ WithPredicate TypeChecker::checkExpr(const ScopePtr& scope, const AstExp result = checkExpr(scope, *a); else if (auto a = expr.as()) result = checkExpr(scope, *a, expectedType); + else if (auto a = expr.as()) + result = checkExpr(scope, *a); else ice("Unhandled AstExpr?"); @@ -2999,6 +3001,14 @@ WithPredicate TypeChecker::checkExpr(const ScopePtr& scope, const AstExp return {types.size() == 1 ? types[0] : addType(UnionTypeVar{std::move(types)})}; } +WithPredicate TypeChecker::checkExpr(const ScopePtr& scope, const AstExprInterpString& expr) +{ + for (AstExpr* expr : expr.expressions) + checkExpr(scope, *expr); + + return {stringType}; +} + TypeId TypeChecker::checkLValue(const ScopePtr& scope, const AstExpr& expr) { return checkLValueBinding(scope, expr); diff --git a/Ast/include/Luau/Ast.h b/Ast/include/Luau/Ast.h index 1e164d0..5c04000 100644 --- a/Ast/include/Luau/Ast.h +++ b/Ast/include/Luau/Ast.h @@ -134,6 +134,10 @@ public: { return visit((class AstExpr*)node); } + virtual bool visit(class AstExprInterpString* node) + { + return visit((class AstExpr*)node); + } virtual bool visit(class AstExprError* node) { return visit((class AstExpr*)node); @@ -732,6 +736,22 @@ public: AstExpr* falseExpr; }; +class AstExprInterpString : public AstExpr +{ +public: + LUAU_RTTI(AstExprInterpString) + + AstExprInterpString(const Location& location, const AstArray>& strings, const AstArray& expressions); + + void visit(AstVisitor* visitor) override; + + /// An interpolated string such as `foo{bar}baz` is represented as + /// an array of strings for "foo" and "bar", and an array of expressions for "baz". + /// `strings` will always have one more element than `expressions`. + AstArray> strings; + AstArray expressions; +}; + class AstStatBlock : public AstStat { public: diff --git a/Ast/include/Luau/Lexer.h b/Ast/include/Luau/Lexer.h index 4f3dbbd..7e7fe76 100644 --- a/Ast/include/Luau/Lexer.h +++ b/Ast/include/Luau/Lexer.h @@ -61,6 +61,12 @@ struct Lexeme SkinnyArrow, DoubleColon, + InterpStringBegin, + InterpStringMid, + InterpStringEnd, + // An interpolated string with no expressions (like `x`) + InterpStringSimple, + AddAssign, SubAssign, MulAssign, @@ -80,6 +86,8 @@ struct Lexeme BrokenString, BrokenComment, BrokenUnicode, + BrokenInterpDoubleBrace, + Error, Reserved_BEGIN, @@ -208,6 +216,11 @@ private: Lexeme readLongString(const Position& start, int sep, Lexeme::Type ok, Lexeme::Type broken); Lexeme readQuotedString(); + Lexeme readInterpolatedStringBegin(); + Lexeme readInterpolatedStringSection(Position start, Lexeme::Type formatType, Lexeme::Type endType); + + void readBackslashInString(); + std::pair readName(); Lexeme readNumber(const Position& start, unsigned int startOffset); @@ -231,6 +244,14 @@ private: bool skipComments; bool readNames; + + enum class BraceType + { + InterpolatedString, + Normal + }; + + std::vector braceStack; }; inline bool isSpace(char ch) diff --git a/Ast/include/Luau/Parser.h b/Ast/include/Luau/Parser.h index 046706d..6f56f90 100644 --- a/Ast/include/Luau/Parser.h +++ b/Ast/include/Luau/Parser.h @@ -228,6 +228,9 @@ private: // TODO: Add grammar rules here? AstExpr* parseIfElseExpr(); + // stringinterp ::= exp { exp} + AstExpr* parseInterpString(); + // Name std::optional parseNameOpt(const char* context = nullptr); Name parseName(const char* context = nullptr); @@ -379,6 +382,7 @@ private: std::vector matchRecoveryStopOnToken; std::vector scratchStat; + std::vector> scratchString; std::vector scratchExpr; std::vector scratchExprAux; std::vector scratchName; diff --git a/Ast/include/Luau/StringUtils.h b/Ast/include/Luau/StringUtils.h index 6ae9e97..dab7610 100644 --- a/Ast/include/Luau/StringUtils.h +++ b/Ast/include/Luau/StringUtils.h @@ -35,6 +35,6 @@ bool equalsLower(std::string_view lhs, std::string_view rhs); size_t hashRange(const char* data, size_t size); -std::string escape(std::string_view s); +std::string escape(std::string_view s, bool escapeForInterpString = false); bool isIdentifier(std::string_view s); } // namespace Luau diff --git a/Ast/src/Ast.cpp b/Ast/src/Ast.cpp index 3066b75..5ede389 100644 --- a/Ast/src/Ast.cpp +++ b/Ast/src/Ast.cpp @@ -349,6 +349,22 @@ AstExprError::AstExprError(const Location& location, const AstArray& e { } +AstExprInterpString::AstExprInterpString(const Location& location, const AstArray>& strings, const AstArray& expressions) + : AstExpr(ClassIndex(), location) + , strings(strings) + , expressions(expressions) +{ +} + +void AstExprInterpString::visit(AstVisitor* visitor) +{ + if (visitor->visit(this)) + { + for (AstExpr* expr : expressions) + expr->visit(visitor); + } +} + void AstExprError::visit(AstVisitor* visitor) { if (visitor->visit(this)) diff --git a/Ast/src/Lexer.cpp b/Ast/src/Lexer.cpp index a1f1d46..b4db8bd 100644 --- a/Ast/src/Lexer.cpp +++ b/Ast/src/Lexer.cpp @@ -6,6 +6,8 @@ #include +LUAU_FASTFLAG(LuauInterpolatedStringBaseSupport) + namespace Luau { @@ -89,7 +91,18 @@ Lexeme::Lexeme(const Location& location, Type type, const char* data, size_t siz , length(unsigned(size)) , data(data) { - LUAU_ASSERT(type == RawString || type == QuotedString || type == Number || type == Comment || type == BlockComment); + LUAU_ASSERT( + type == RawString + || type == QuotedString + || type == InterpStringBegin + || type == InterpStringMid + || type == InterpStringEnd + || type == InterpStringSimple + || type == BrokenInterpDoubleBrace + || type == Number + || type == Comment + || type == BlockComment + ); } Lexeme::Lexeme(const Location& location, Type type, const char* name) @@ -160,6 +173,18 @@ std::string Lexeme::toString() const case QuotedString: return data ? format("\"%.*s\"", length, data) : "string"; + case InterpStringBegin: + return data ? format("`%.*s{", length, data) : "the beginning of an interpolated string"; + + case InterpStringMid: + return data ? format("}%.*s{", length, data) : "the middle of an interpolated string"; + + case InterpStringEnd: + return data ? format("}%.*s`", length, data) : "the end of an interpolated string"; + + case InterpStringSimple: + return data ? format("`%.*s`", length, data) : "interpolated string"; + case Number: return data ? format("'%.*s'", length, data) : "number"; @@ -175,6 +200,9 @@ std::string Lexeme::toString() const case BrokenComment: return "unfinished comment"; + case BrokenInterpDoubleBrace: + return "'{{', which is invalid (did you mean '\\{'?)"; + case BrokenUnicode: if (codepoint) { @@ -515,6 +543,32 @@ Lexeme Lexer::readLongString(const Position& start, int sep, Lexeme::Type ok, Le return Lexeme(Location(start, position()), broken); } +void Lexer::readBackslashInString() +{ + LUAU_ASSERT(peekch() == '\\'); + consume(); + switch (peekch()) + { + case '\r': + consume(); + if (peekch() == '\n') + consume(); + break; + + case 0: + break; + + case 'z': + consume(); + while (isSpace(peekch())) + consume(); + break; + + default: + consume(); + } +} + Lexeme Lexer::readQuotedString() { Position start = position(); @@ -535,27 +589,7 @@ Lexeme Lexer::readQuotedString() return Lexeme(Location(start, position()), Lexeme::BrokenString); case '\\': - consume(); - switch (peekch()) - { - case '\r': - consume(); - if (peekch() == '\n') - consume(); - break; - - case 0: - break; - - case 'z': - consume(); - while (isSpace(peekch())) - consume(); - break; - - default: - consume(); - } + readBackslashInString(); break; default: @@ -568,6 +602,69 @@ Lexeme Lexer::readQuotedString() return Lexeme(Location(start, position()), Lexeme::QuotedString, &buffer[startOffset], offset - startOffset - 1); } +Lexeme Lexer::readInterpolatedStringBegin() +{ + LUAU_ASSERT(peekch() == '`'); + + Position start = position(); + consume(); + + return readInterpolatedStringSection(start, Lexeme::InterpStringBegin, Lexeme::InterpStringSimple); +} + +Lexeme Lexer::readInterpolatedStringSection(Position start, Lexeme::Type formatType, Lexeme::Type endType) +{ + unsigned int startOffset = offset; + + while (peekch() != '`') + { + switch (peekch()) + { + case 0: + case '\r': + case '\n': + return Lexeme(Location(start, position()), Lexeme::BrokenString); + + case '\\': + // Allow for \u{}, which would otherwise be consumed by looking for { + if (peekch(1) == 'u' && peekch(2) == '{') + { + consume(); // backslash + consume(); // u + consume(); // { + break; + } + + readBackslashInString(); + break; + + case '{': + { + braceStack.push_back(BraceType::InterpolatedString); + + if (peekch(1) == '{') + { + Lexeme brokenDoubleBrace = Lexeme(Location(start, position()), Lexeme::BrokenInterpDoubleBrace, &buffer[startOffset], offset - startOffset); + consume(); + consume(); + return brokenDoubleBrace; + } + + Lexeme lexemeOutput(Location(start, position()), Lexeme::InterpStringBegin, &buffer[startOffset], offset - startOffset); + consume(); + return lexemeOutput; + } + + default: + consume(); + } + } + + consume(); + + return Lexeme(Location(start, position()), endType, &buffer[startOffset], offset - startOffset - 1); +} + Lexeme Lexer::readNumber(const Position& start, unsigned int startOffset) { LUAU_ASSERT(isDigit(peekch())); @@ -660,6 +757,36 @@ Lexeme Lexer::readNext() } } + case '{': + { + consume(); + + if (!braceStack.empty()) + braceStack.push_back(BraceType::Normal); + + return Lexeme(Location(start, 1), '{'); + } + + case '}': + { + consume(); + + if (braceStack.empty()) + { + return Lexeme(Location(start, 1), '}'); + } + + const BraceType braceStackTop = braceStack.back(); + braceStack.pop_back(); + + if (braceStackTop != BraceType::InterpolatedString) + { + return Lexeme(Location(start, 1), '}'); + } + + return readInterpolatedStringSection(position(), Lexeme::InterpStringMid, Lexeme::InterpStringEnd); + } + case '=': { consume(); @@ -716,6 +843,15 @@ Lexeme Lexer::readNext() case '\'': return readQuotedString(); + case '`': + if (FFlag::LuauInterpolatedStringBaseSupport) + return readInterpolatedStringBegin(); + else + { + consume(); + return Lexeme(Location(start, 1), '`'); + } + case '.': consume(); @@ -817,8 +953,6 @@ Lexeme Lexer::readNext() case '(': case ')': - case '{': - case '}': case ']': case ';': case ',': diff --git a/Ast/src/Parser.cpp b/Ast/src/Parser.cpp index e46eebf..cc624c1 100644 --- a/Ast/src/Parser.cpp +++ b/Ast/src/Parser.cpp @@ -23,10 +23,14 @@ LUAU_FASTFLAGVARIABLE(LuauErrorDoubleHexPrefix, false) LUAU_FASTFLAGVARIABLE(LuauLintParseIntegerIssues, false) LUAU_DYNAMIC_FASTFLAGVARIABLE(LuaReportParseIntegerIssues, false) +LUAU_FASTFLAGVARIABLE(LuauInterpolatedStringBaseSupport, false) + bool lua_telemetry_parsed_out_of_range_bin_integer = false; bool lua_telemetry_parsed_out_of_range_hex_integer = false; bool lua_telemetry_parsed_double_prefix_hex_integer = false; +#define ERROR_INVALID_INTERP_DOUBLE_BRACE "Double braces are not permitted within interpolated strings. Did you mean '\\{'?" + namespace Luau { @@ -1567,6 +1571,12 @@ AstTypeOrPack Parser::parseSimpleTypeAnnotation(bool allowPack) else return {reportTypeAnnotationError(begin, {}, /*isMissing*/ false, "String literal contains malformed escape sequence")}; } + else if (lexer.current().type == Lexeme::InterpStringBegin || lexer.current().type == Lexeme::InterpStringSimple) + { + parseInterpString(); + + return {reportTypeAnnotationError(begin, {}, /*isMissing*/ false, "Interpolated string literals cannot be used as types")}; + } else if (lexer.current().type == Lexeme::BrokenString) { Location location = lexer.current().location; @@ -2215,15 +2225,24 @@ AstExpr* Parser::parseSimpleExpr() { return parseNumber(); } - else if (lexer.current().type == Lexeme::RawString || lexer.current().type == Lexeme::QuotedString) + else if (lexer.current().type == Lexeme::RawString || lexer.current().type == Lexeme::QuotedString || (FFlag::LuauInterpolatedStringBaseSupport && lexer.current().type == Lexeme::InterpStringSimple)) { return parseString(); } + else if (FFlag::LuauInterpolatedStringBaseSupport && lexer.current().type == Lexeme::InterpStringBegin) + { + return parseInterpString(); + } else if (lexer.current().type == Lexeme::BrokenString) { nextLexeme(); return reportExprError(start, {}, "Malformed string"); } + else if (lexer.current().type == Lexeme::BrokenInterpDoubleBrace) + { + nextLexeme(); + return reportExprError(start, {}, ERROR_INVALID_INTERP_DOUBLE_BRACE); + } else if (lexer.current().type == Lexeme::Dot3) { if (functionStack.back().vararg) @@ -2614,11 +2633,11 @@ AstArray Parser::parseTypeParams() std::optional> Parser::parseCharArray() { - LUAU_ASSERT(lexer.current().type == Lexeme::QuotedString || lexer.current().type == Lexeme::RawString); + LUAU_ASSERT(lexer.current().type == Lexeme::QuotedString || lexer.current().type == Lexeme::RawString || lexer.current().type == Lexeme::InterpStringSimple); scratchData.assign(lexer.current().data, lexer.current().length); - if (lexer.current().type == Lexeme::QuotedString) + if (lexer.current().type == Lexeme::QuotedString || lexer.current().type == Lexeme::InterpStringSimple) { if (!Lexer::fixupQuotedString(scratchData)) { @@ -2645,6 +2664,70 @@ AstExpr* Parser::parseString() return reportExprError(location, {}, "String literal contains malformed escape sequence"); } +AstExpr* Parser::parseInterpString() +{ + TempVector> strings(scratchString); + TempVector expressions(scratchExpr); + + Location startLocation = lexer.current().location; + + do { + Lexeme currentLexeme = lexer.current(); + LUAU_ASSERT( + currentLexeme.type == Lexeme::InterpStringBegin + || currentLexeme.type == Lexeme::InterpStringMid + || currentLexeme.type == Lexeme::InterpStringEnd + || currentLexeme.type == Lexeme::InterpStringSimple + ); + + Location location = currentLexeme.location; + + Location startOfBrace = Location(location.end, 1); + + scratchData.assign(currentLexeme.data, currentLexeme.length); + + if (!Lexer::fixupQuotedString(scratchData)) + { + nextLexeme(); + return reportExprError(startLocation, {}, "Interpolated string literal contains malformed escape sequence"); + } + + AstArray chars = copy(scratchData); + + nextLexeme(); + + strings.push_back(chars); + + if (currentLexeme.type == Lexeme::InterpStringEnd || currentLexeme.type == Lexeme::InterpStringSimple) + { + AstArray> stringsArray = copy(strings); + AstArray expressionsArray = copy(expressions); + + return allocator.alloc(startLocation, stringsArray, expressionsArray); + } + + AstExpr* expression = parseExpr(); + + expressions.push_back(expression); + + switch (lexer.current().type) + { + case Lexeme::InterpStringBegin: + case Lexeme::InterpStringMid: + case Lexeme::InterpStringEnd: + break; + case Lexeme::BrokenInterpDoubleBrace: + nextLexeme(); + return reportExprError(location, {}, ERROR_INVALID_INTERP_DOUBLE_BRACE); + case Lexeme::BrokenString: + nextLexeme(); + return reportExprError(location, {}, "Malformed interpolated string, did you forget to add a '}'?"); + default: + return reportExprError(location, {}, "Malformed interpolated string, got %s", lexer.current().toString().c_str()); + } + } while (true); +} + AstExpr* Parser::parseNumber() { Location start = lexer.current().location; diff --git a/Ast/src/StringUtils.cpp b/Ast/src/StringUtils.cpp index 0dc3f3f..11e0076 100644 --- a/Ast/src/StringUtils.cpp +++ b/Ast/src/StringUtils.cpp @@ -230,19 +230,25 @@ bool isIdentifier(std::string_view s) return (s.find_first_not_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890_") == std::string::npos); } -std::string escape(std::string_view s) +std::string escape(std::string_view s, bool escapeForInterpString) { std::string r; r.reserve(s.size() + 50); // arbitrary number to guess how many characters we'll be inserting for (uint8_t c : s) { - if (c >= ' ' && c != '\\' && c != '\'' && c != '\"') + if (c >= ' ' && c != '\\' && c != '\'' && c != '\"' && c != '`' && c != '{') r += c; else { r += '\\'; + if (escapeForInterpString && (c == '`' || c == '{')) + { + r += c; + continue; + } + switch (c) { case '\a': diff --git a/Compiler/src/Compiler.cpp b/Compiler/src/Compiler.cpp index bd8744c..a9394d7 100644 --- a/Compiler/src/Compiler.cpp +++ b/Compiler/src/Compiler.cpp @@ -14,6 +14,8 @@ #include #include +#include + #include LUAU_FASTINTVARIABLE(LuauCompileLoopUnrollThreshold, 25) @@ -25,6 +27,8 @@ LUAU_FASTINTVARIABLE(LuauCompileInlineDepth, 5) LUAU_FASTFLAGVARIABLE(LuauCompileXEQ, false) +LUAU_FASTFLAG(LuauInterpolatedStringBaseSupport) + LUAU_FASTFLAGVARIABLE(LuauCompileOptimalAssignment, false) LUAU_FASTFLAGVARIABLE(LuauCompileExtractK, false) @@ -1585,6 +1589,76 @@ struct Compiler } } + void compileExprInterpString(AstExprInterpString* expr, uint8_t target, bool targetTemp) + { + size_t formatCapacity = 0; + for (AstArray string : expr->strings) + { + formatCapacity += string.size + std::count(string.data, string.data + string.size, '%'); + } + + std::string formatString; + formatString.reserve(formatCapacity); + + size_t stringsLeft = expr->strings.size; + + for (AstArray string : expr->strings) + { + if (memchr(string.data, '%', string.size)) + { + for (size_t characterIndex = 0; characterIndex < string.size; ++characterIndex) + { + char character = string.data[characterIndex]; + formatString.push_back(character); + + if (character == '%') + formatString.push_back('%'); + } + } + else + formatString.append(string.data, string.size); + + stringsLeft--; + + if (stringsLeft > 0) + formatString += "%*"; + } + + size_t formatStringSize = formatString.size(); + + // We can't use formatStringRef.data() directly, because short strings don't have their data + // pinned in memory, so when interpFormatStrings grows, these pointers will move and become invalid. + std::unique_ptr formatStringPtr(new char[formatStringSize]); + memcpy(formatStringPtr.get(), formatString.data(), formatStringSize); + + AstArray formatStringArray{formatStringPtr.get(), formatStringSize}; + interpStrings.emplace_back(std::move(formatStringPtr)); // invalidates formatStringPtr, but keeps formatStringArray intact + + int32_t formatStringIndex = bytecode.addConstantString(sref(formatStringArray)); + if (formatStringIndex < 0) + CompileError::raise(expr->location, "Exceeded constant limit; simplify the code to compile"); + + RegScope rs(this); + + uint8_t baseReg = allocReg(expr, uint8_t(2 + expr->expressions.size)); + + emitLoadK(baseReg, formatStringIndex); + + for (size_t index = 0; index < expr->expressions.size; ++index) + compileExprTempTop(expr->expressions.data[index], uint8_t(baseReg + 2 + index)); + + BytecodeBuilder::StringRef formatMethod = sref(AstName("format")); + + int32_t formatMethodIndex = bytecode.addConstantString(formatMethod); + if (formatMethodIndex < 0) + CompileError::raise(expr->location, "Exceeded constant limit; simplify the code to compile"); + + bytecode.emitABC(LOP_NAMECALL, baseReg, baseReg, uint8_t(BytecodeBuilder::getStringHash(formatMethod))); + bytecode.emitAux(formatMethodIndex); + bytecode.emitABC(LOP_CALL, baseReg, uint8_t(expr->expressions.size + 2), 2); + bytecode.emitABC(LOP_MOVE, target, baseReg, 0); + } + static uint8_t encodeHashSize(unsigned int hashSize) { size_t hashSizeLog2 = 0; @@ -2059,6 +2133,10 @@ struct Compiler { compileExprIfElse(expr, target, targetTemp); } + else if (AstExprInterpString* interpString = node->as(); FFlag::LuauInterpolatedStringBaseSupport && interpString) + { + compileExprInterpString(interpString, target, targetTemp); + } else { LUAU_ASSERT(!"Unknown expression type"); @@ -3808,6 +3886,7 @@ struct Compiler std::vector loops; std::vector inlineFrames; std::vector captures; + std::vector> interpStrings; }; void compileOrThrow(BytecodeBuilder& bytecode, const ParseResult& parseResult, const AstNameTable& names, const CompileOptions& inputOptions) diff --git a/Compiler/src/ConstantFolding.cpp b/Compiler/src/ConstantFolding.cpp index 34f7954..e35c883 100644 --- a/Compiler/src/ConstantFolding.cpp +++ b/Compiler/src/ConstantFolding.cpp @@ -349,6 +349,11 @@ struct ConstantVisitor : AstVisitor if (cond.type != Constant::Type_Unknown) result = cond.isTruthful() ? trueExpr : falseExpr; } + else if (AstExprInterpString* expr = node->as()) + { + for (AstExpr* expression : expr->expressions) + analyze(expression); + } else { LUAU_ASSERT(!"Unknown expression type"); diff --git a/Compiler/src/CostModel.cpp b/Compiler/src/CostModel.cpp index 81cbfd7..ffc1cb1 100644 --- a/Compiler/src/CostModel.cpp +++ b/Compiler/src/CostModel.cpp @@ -215,6 +215,16 @@ struct CostVisitor : AstVisitor { return model(expr->condition) + model(expr->trueExpr) + model(expr->falseExpr) + 2; } + else if (AstExprInterpString* expr = node->as()) + { + // Baseline cost of string.format + Cost cost = 3; + + for (AstExpr* innerExpression : expr->expressions) + cost += model(innerExpression); + + return cost; + } else { LUAU_ASSERT(!"Unknown expression type"); diff --git a/docs/_pages/grammar.md b/docs/_pages/grammar.md index ffb1dbf..90d918e 100644 --- a/docs/_pages/grammar.md +++ b/docs/_pages/grammar.md @@ -44,7 +44,8 @@ functioncall = prefixexp funcargs | prefixexp ':' NAME funcargs exp = (asexp | unop exp) { binop exp } ifelseexp = 'if' exp 'then' exp {'elseif' exp 'then' exp} 'else' exp asexp = simpleexp ['::' Type] -simpleexp = NUMBER | STRING | 'nil' | 'true' | 'false' | '...' | tableconstructor | 'function' body | prefixexp | ifelseexp +stringinterp = INTERP_BEGIN exp { INTERP_MID exp } INTERP_END +simpleexp = NUMBER | STRING | 'nil' | 'true' | 'false' | '...' | tableconstructor | 'function' body | prefixexp | ifelseexp | stringinterp funcargs = '(' [explist] ')' | tableconstructor | STRING tableconstructor = '{' [fieldlist] '}' diff --git a/tests/AstJsonEncoder.test.cpp b/tests/AstJsonEncoder.test.cpp index 3ff3674..a23f6f4 100644 --- a/tests/AstJsonEncoder.test.cpp +++ b/tests/AstJsonEncoder.test.cpp @@ -2,6 +2,7 @@ #include "Luau/Ast.h" #include "Luau/AstJsonEncoder.h" #include "Luau/Parser.h" +#include "ScopedFlags.h" #include "doctest.h" @@ -175,6 +176,17 @@ TEST_CASE_FIXTURE(JsonEncoderFixture, "encode_AstExprIfThen") CHECK(toJson(statement) == expected); } +TEST_CASE_FIXTURE(JsonEncoderFixture, "encode_AstExprInterpString") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + AstStat* statement = expectParseStatement("local a = `var = {x}`"); + + std::string_view expected = + R"({"type":"AstStatLocal","location":"0,0 - 0,17","vars":[{"luauType":null,"name":"a","type":"AstLocal","location":"0,6 - 0,7"}],"values":[{"type":"AstExprInterpString","location":"0,10 - 0,17","strings":["var = ",""],"expressions":[{"type":"AstExprGlobal","location":"0,18 - 0,19","global":"x"}]}]})"; + + CHECK(toJson(statement) == expected); +} TEST_CASE("encode_AstExprLocal") { diff --git a/tests/Autocomplete.test.cpp b/tests/Autocomplete.test.cpp index 25447dd..988cbe8 100644 --- a/tests/Autocomplete.test.cpp +++ b/tests/Autocomplete.test.cpp @@ -2708,6 +2708,15 @@ a = if temp then even else abc@3 CHECK(ac.entryMap.count("abcdef")); } +TEST_CASE_FIXTURE(ACFixture, "autocomplete_interpolated_string") +{ + check(R"(f(`expression = {@1}`))"); + + auto ac = autocomplete('1'); + CHECK(ac.entryMap.count("table")); + CHECK_EQ(ac.context, AutocompleteContext::Expression); +} + TEST_CASE_FIXTURE(ACFixture, "autocomplete_explicit_type_pack") { check(R"( diff --git a/tests/Compiler.test.cpp b/tests/Compiler.test.cpp index a2e748a..1aa6911 100644 --- a/tests/Compiler.test.cpp +++ b/tests/Compiler.test.cpp @@ -1230,6 +1230,58 @@ RETURN R0 0 )"); } +TEST_CASE("InterpStringWithNoExpressions") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + CHECK_EQ(compileFunction0(R"(return "hello")"), compileFunction0("return `hello`")); +} + +TEST_CASE("InterpStringZeroCost") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + CHECK_EQ( + "\n" + compileFunction0(R"(local _ = `hello, {"world"}!`)"), + R"( +LOADK R1 K0 +LOADK R3 K1 +NAMECALL R1 R1 K2 +CALL R1 2 1 +MOVE R0 R1 +RETURN R0 0 +)" + ); +} + +TEST_CASE("InterpStringRegisterCleanup") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + CHECK_EQ( + "\n" + compileFunction0(R"( + local a, b, c = nil, "um", "uh oh" + a = `foo{"bar"}` + print(a) + )"), + + R"( +LOADNIL R0 +LOADK R1 K0 +LOADK R2 K1 +LOADK R3 K2 +LOADK R5 K3 +NAMECALL R3 R3 K4 +CALL R3 2 1 +MOVE R0 R3 +GETIMPORT R3 6 +MOVE R4 R0 +CALL R3 1 0 +RETURN R0 0 +)" + ); +} + TEST_CASE("ConstantFoldArith") { CHECK_EQ("\n" + compileFunction0("return 10 + 2"), R"( diff --git a/tests/Conformance.test.cpp b/tests/Conformance.test.cpp index be2feac..89cb075 100644 --- a/tests/Conformance.test.cpp +++ b/tests/Conformance.test.cpp @@ -294,6 +294,14 @@ TEST_CASE("Strings") runConformance("strings.lua"); } +TEST_CASE("StringInterp") +{ + ScopedFastFlag sffInterpStrings{"LuauInterpolatedStringBaseSupport", true}; + ScopedFastFlag sffTostringFormat{"LuauTostringFormatSpecifier", true}; + + runConformance("stringinterp.lua"); +} + TEST_CASE("VarArg") { runConformance("vararg.lua"); diff --git a/tests/Lexer.test.cpp b/tests/Lexer.test.cpp index 20d8d0d..890d100 100644 --- a/tests/Lexer.test.cpp +++ b/tests/Lexer.test.cpp @@ -138,4 +138,90 @@ TEST_CASE("lookahead") CHECK_EQ(lexer.lookahead().type, Lexeme::Eof); } +TEST_CASE("string_interpolation_basic") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + const std::string testInput = R"(`foo {"bar"}`)"; + Luau::Allocator alloc; + AstNameTable table(alloc); + Lexer lexer(testInput.c_str(), testInput.size(), table); + + Lexeme interpBegin = lexer.next(); + CHECK_EQ(interpBegin.type, Lexeme::InterpStringBegin); + + Lexeme quote = lexer.next(); + CHECK_EQ(quote.type, Lexeme::QuotedString); + + Lexeme interpEnd = lexer.next(); + CHECK_EQ(interpEnd.type, Lexeme::InterpStringEnd); +} + +TEST_CASE("string_interpolation_double_brace") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + const std::string testInput = R"(`foo{{bad}}bar`)"; + Luau::Allocator alloc; + AstNameTable table(alloc); + Lexer lexer(testInput.c_str(), testInput.size(), table); + + auto brokenInterpBegin = lexer.next(); + CHECK_EQ(brokenInterpBegin.type, Lexeme::BrokenInterpDoubleBrace); + CHECK_EQ(std::string(brokenInterpBegin.data, brokenInterpBegin.length), std::string("foo")); + + CHECK_EQ(lexer.next().type, Lexeme::Name); + + auto interpEnd = lexer.next(); + CHECK_EQ(interpEnd.type, Lexeme::InterpStringEnd); + CHECK_EQ(std::string(interpEnd.data, interpEnd.length), std::string("}bar")); +} + +TEST_CASE("string_interpolation_double_but_unmatched_brace") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + const std::string testInput = R"(`{{oops}`, 1)"; + Luau::Allocator alloc; + AstNameTable table(alloc); + Lexer lexer(testInput.c_str(), testInput.size(), table); + + CHECK_EQ(lexer.next().type, Lexeme::BrokenInterpDoubleBrace); + CHECK_EQ(lexer.next().type, Lexeme::Name); + CHECK_EQ(lexer.next().type, Lexeme::InterpStringEnd); + CHECK_EQ(lexer.next().type, ','); + CHECK_EQ(lexer.next().type, Lexeme::Number); +} + +TEST_CASE("string_interpolation_unmatched_brace") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + const std::string testInput = R"({ + `hello {"world"} + } -- this might be incorrectly parsed as a string)"; + Luau::Allocator alloc; + AstNameTable table(alloc); + Lexer lexer(testInput.c_str(), testInput.size(), table); + + CHECK_EQ(lexer.next().type, '{'); + CHECK_EQ(lexer.next().type, Lexeme::InterpStringBegin); + CHECK_EQ(lexer.next().type, Lexeme::QuotedString); + CHECK_EQ(lexer.next().type, Lexeme::BrokenString); + CHECK_EQ(lexer.next().type, '}'); +} + +TEST_CASE("string_interpolation_with_unicode_escape") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + const std::string testInput = R"(`\u{1F41B}`)"; + Luau::Allocator alloc; + AstNameTable table(alloc); + Lexer lexer(testInput.c_str(), testInput.size(), table); + + CHECK_EQ(lexer.next().type, Lexeme::InterpStringSimple); + CHECK_EQ(lexer.next().type, Lexeme::Eof); +} + TEST_SUITE_END(); diff --git a/tests/Linter.test.cpp b/tests/Linter.test.cpp index 64c6d3e..5a4ab33 100644 --- a/tests/Linter.test.cpp +++ b/tests/Linter.test.cpp @@ -1662,17 +1662,31 @@ TEST_CASE_FIXTURE(Fixture, "WrongCommentOptimize") { LintResult result = lint(R"( --!optimize ---!optimize --!optimize me --!optimize 100500 --!optimize 2 )"); - REQUIRE_EQ(result.warnings.size(), 4); + REQUIRE_EQ(result.warnings.size(), 3); CHECK_EQ(result.warnings[0].text, "optimize directive requires an optimization level"); - CHECK_EQ(result.warnings[1].text, "optimize directive requires an optimization level"); - CHECK_EQ(result.warnings[2].text, "optimize directive uses unknown optimization level 'me', 0..2 expected"); - CHECK_EQ(result.warnings[3].text, "optimize directive uses unknown optimization level '100500', 0..2 expected"); + CHECK_EQ(result.warnings[1].text, "optimize directive uses unknown optimization level 'me', 0..2 expected"); + CHECK_EQ(result.warnings[2].text, "optimize directive uses unknown optimization level '100500', 0..2 expected"); + + result = lint("--!optimize "); + REQUIRE_EQ(result.warnings.size(), 1); + CHECK_EQ(result.warnings[0].text, "optimize directive requires an optimization level"); +} + +TEST_CASE_FIXTURE(Fixture, "TestStringInterpolation") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + LintResult result = lint(R"( + --!nocheck + local _ = `unknown {foo}` + )"); + + REQUIRE_EQ(result.warnings.size(), 1); } TEST_CASE_FIXTURE(Fixture, "IntegerParsing") diff --git a/tests/Parser.test.cpp b/tests/Parser.test.cpp index c55ec18..2dd4770 100644 --- a/tests/Parser.test.cpp +++ b/tests/Parser.test.cpp @@ -905,6 +905,146 @@ TEST_CASE_FIXTURE(Fixture, "parse_compound_assignment_error_multiple") } } +TEST_CASE_FIXTURE(Fixture, "parse_interpolated_string_double_brace_begin") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + try + { + parse(R"( + _ = `{{oops}}` + )"); + FAIL("Expected ParseErrors to be thrown"); + } + catch (const ParseErrors& e) + { + CHECK_EQ("Double braces are not permitted within interpolated strings. Did you mean '\\{'?", e.getErrors().front().getMessage()); + } +} + +TEST_CASE_FIXTURE(Fixture, "parse_interpolated_string_double_brace_mid") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + try + { + parse(R"( + _ = `{nice} {{oops}}` + )"); + FAIL("Expected ParseErrors to be thrown"); + } + catch (const ParseErrors& e) + { + CHECK_EQ("Double braces are not permitted within interpolated strings. Did you mean '\\{'?", e.getErrors().front().getMessage()); + } +} + +TEST_CASE_FIXTURE(Fixture, "parse_interpolated_string_without_end_brace") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + auto columnOfEndBraceError = [this](const char* code) + { + try + { + parse(code); + FAIL("Expected ParseErrors to be thrown"); + return UINT_MAX; + } + catch (const ParseErrors& e) + { + CHECK_EQ(e.getErrors().size(), 1); + + auto error = e.getErrors().front(); + CHECK_EQ("Malformed interpolated string, did you forget to add a '}'?", error.getMessage()); + return error.getLocation().begin.column; + } + }; + + // This makes sure that the error is coming from the brace itself + CHECK_EQ(columnOfEndBraceError("_ = `{a`"), columnOfEndBraceError("_ = `{abcdefg`")); + CHECK_NE(columnOfEndBraceError("_ = `{a`"), columnOfEndBraceError("_ = `{a`")); +} + +TEST_CASE_FIXTURE(Fixture, "parse_interpolated_string_without_end_brace_in_table") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + try + { + parse(R"( + _ = { `{a` } + )"); + FAIL("Expected ParseErrors to be thrown"); + } + catch (const ParseErrors& e) + { + CHECK_EQ(e.getErrors().size(), 2); + + CHECK_EQ("Malformed interpolated string, did you forget to add a '}'?", e.getErrors().front().getMessage()); + CHECK_EQ("Expected '}' (to close '{' at line 2), got ", e.getErrors().back().getMessage()); + } +} + +TEST_CASE_FIXTURE(Fixture, "parse_interpolated_string_mid_without_end_brace_in_table") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + try + { + parse(R"( + _ = { `x {"y"} {z` } + )"); + FAIL("Expected ParseErrors to be thrown"); + } + catch (const ParseErrors& e) + { + CHECK_EQ(e.getErrors().size(), 2); + + CHECK_EQ("Malformed interpolated string, did you forget to add a '}'?", e.getErrors().front().getMessage()); + CHECK_EQ("Expected '}' (to close '{' at line 2), got ", e.getErrors().back().getMessage()); + } +} + +TEST_CASE_FIXTURE(Fixture, "parse_interpolated_string_as_type_fail") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + try + { + parse(R"( + local a: `what` = `???` + local b: `what {"the"}` = `???` + local c: `what {"the"} heck` = `???` + )"); + FAIL("Expected ParseErrors to be thrown"); + } + catch (const ParseErrors& parseErrors) + { + CHECK_EQ(parseErrors.getErrors().size(), 3); + + for (ParseError error : parseErrors.getErrors()) + CHECK_EQ(error.getMessage(), "Interpolated string literals cannot be used as types"); + } +} + +TEST_CASE_FIXTURE(Fixture, "parse_interpolated_string_call_without_parens") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + try + { + parse(R"( + _ = print `{42}` + )"); + FAIL("Expected ParseErrors to be thrown"); + } + catch (const ParseErrors& e) + { + CHECK_EQ("Expected identifier when parsing expression, got `{", e.getErrors().front().getMessage()); + } +} + TEST_CASE_FIXTURE(Fixture, "parse_nesting_based_end_detection") { try diff --git a/tests/Transpiler.test.cpp b/tests/Transpiler.test.cpp index d2ed9ae..e79bc9b 100644 --- a/tests/Transpiler.test.cpp +++ b/tests/Transpiler.test.cpp @@ -6,6 +6,7 @@ #include "Luau/Transpiler.h" #include "Fixture.h" +#include "ScopedFlags.h" #include "doctest.h" @@ -678,4 +679,22 @@ TEST_CASE_FIXTURE(Fixture, "transpile_for_in_multiple_types") CHECK_EQ(code, transpile(code, {}, true).code); } +TEST_CASE_FIXTURE(Fixture, "transpile_string_interp") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + std::string code = R"( local _ = `hello {name}` )"; + + CHECK_EQ(code, transpile(code, {}, true).code); +} + +TEST_CASE_FIXTURE(Fixture, "transpile_string_literal_escape") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + std::string code = R"( local _ = ` bracket = \{, backtick = \` = {'ok'} ` )"; + + CHECK_EQ(code, transpile(code, {}, true).code); +} + TEST_SUITE_END(); diff --git a/tests/TypeInfer.test.cpp b/tests/TypeInfer.test.cpp index 8088936..e1dc502 100644 --- a/tests/TypeInfer.test.cpp +++ b/tests/TypeInfer.test.cpp @@ -8,6 +8,7 @@ #include "Luau/VisitTypeVar.h" #include "Fixture.h" +#include "ScopedFlags.h" #include "doctest.h" @@ -828,6 +829,41 @@ end LUAU_REQUIRE_NO_ERRORS(result); } +TEST_CASE_FIXTURE(Fixture, "tc_interpolated_string_basic") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + CheckResult result = check(R"( + local foo: string = `hello {"world"}` + )"); + + LUAU_REQUIRE_NO_ERRORS(result); +} + +TEST_CASE_FIXTURE(Fixture, "tc_interpolated_string_with_invalid_expression") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + CheckResult result = check(R"( + local function f(x: number) end + + local foo: string = `hello {f("uh oh")}` + )"); + + LUAU_REQUIRE_ERROR_COUNT(1, result); +} + +TEST_CASE_FIXTURE(Fixture, "tc_interpolated_string_constant_type") +{ + ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true}; + + CheckResult result = check(R"( + local foo: "hello" = `hello` + )"); + + LUAU_REQUIRE_NO_ERRORS(result); +} + /* * If it wasn't instantly obvious, we have the fuzzer to thank for this gem of a test. * diff --git a/tests/conformance/stringinterp.lua b/tests/conformance/stringinterp.lua new file mode 100644 index 0000000..efb25ba --- /dev/null +++ b/tests/conformance/stringinterp.lua @@ -0,0 +1,59 @@ +local function assertEq(left, right) + assert(typeof(left) == "string", "left is a " .. typeof(left)) + assert(typeof(right) == "string", "right is a " .. typeof(right)) + + if left ~= right then + error(string.format("%q ~= %q", left, right)) + end +end + +assertEq(`hello {"world"}`, "hello world") +assertEq(`Welcome {"to"} {"Luau"}!`, "Welcome to Luau!") + +assertEq(`2 + 2 = {2 + 2}`, "2 + 2 = 4") + +assertEq(`{1} {2} {3} {4} {5} {6} {7}`, "1 2 3 4 5 6 7") + +local combo = {5, 2, 8, 9} +assertEq(`The lock combinations are: {table.concat(combo, ", ")}`, "The lock combinations are: 5, 2, 8, 9") + +assertEq(`true = {true}`, "true = true") + +local name = "Luau" +assertEq(`Welcome to { + name +}!`, "Welcome to Luau!") + +local nameNotConstantEvaluated = (function() return "Luau" end)() +assertEq(`Welcome to {nameNotConstantEvaluated}!`, "Welcome to Luau!") + +assertEq(`This {localName} does not exist`, "This nil does not exist") + +assertEq(`Welcome to \ +{name}!`, "Welcome to \nLuau!") + +assertEq(`empty`, "empty") + +assertEq(`Escaped brace: \{}`, "Escaped brace: {}") +assertEq(`Escaped brace \{} with {"expression"}`, "Escaped brace {} with expression") +assertEq(`Backslash \ that escapes the space is not a part of the string...`, "Backslash that escapes the space is not a part of the string...") +assertEq(`Escaped backslash \\`, "Escaped backslash \\") +assertEq(`Escaped backtick: \``, "Escaped backtick: `") + +assertEq(`Hello {`from inside {"a nested string"}`}`, "Hello from inside a nested string") + +assertEq(`1 {`2 {`3 {4}`}`}`, "1 2 3 4") + +local health = 50 +assert(`You have {health}% health` == "You have 50% health") + +local function shadowsString(string) + return `Value is {string}` +end + +assertEq(shadowsString("hello"), "Value is hello") +assertEq(shadowsString(1), "Value is 1") + +assertEq(`\u{0041}\t`, "A\t") + +return "OK"