Skip to content

Commit 4dc49a4

Browse files
authored
Make unfinished strings not go past the end of the line anymore (#146)
* Ensure strings do not lex past the end of the line. Fixes #141. * Remove unused diagnostic. * Update changelog.
1 parent d713e43 commit 4dc49a4

File tree

6 files changed

+58
-69
lines changed

6 files changed

+58
-69
lines changed

CHANGES.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2929
- **[Breaking]** Fixed the way invalid statement diagnostics are generated by changing them to be generated on the skipped token instead of on the full missing identifier node by @GGG-KILLER in https://github.com/LorettaDevs/Loretta/pull/135.
3030
- **[Breaking]** `LuaSyntaxOptions.All` now accepts interpolated strings instead of hash strings by @GGG-KILLER in https://github.com/LorettaDevs/Loretta/pull/138.
3131
- **[Breaking]** `FunctionTypeSyntax.Parameters` (and associated method parameters) have been switched from a `SeparatedSyntaxList<TypeSyntax>` into a `SeparatedSyntaxList<FunctionTypeParameterSyntax>` by @GGG-KILLER in https://github.com/LorettaDevs/Loretta/pull/145.
32-
- **[Breaking]** `SyntaxFactory.FunctionType` now accepts a `SeparatedSyntaxList<FunctionTypeParameterSyntax>` instead of a `SeparatedSyntaxList<TypeSyntax>` by @GGG-KILLER in https://github.com/LorettaDevs/Loretta/pull/145.
32+
- **[Breaking]** `SyntaxFactory.FunctionType` now accepts a `SeparatedSyntaxList<FunctionTypeParameterSyntax>` instead of a `SeparatedSyntaxList<TypeSyntax>` by @GGG-KILLER in https://github.com/LorettaDevs/Loretta/pull/145.
33+
- **[Breaking]** Strings end at unescaped line break instead of generating warnings and continuing until the next quote or EOF @GGG-KILLER in https://github.com/LorettaDevs/Loretta/pull/146.
3334

3435
### Deprecated
3536

@@ -39,6 +40,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
3940

4041
- `acceptHashStrings` has been removed from `LuaSyntaxOptions` constructor and `.With` in favor of `backtickStringType` by @GGG-KILLER in https://github.com/LorettaDevs/Loretta/pull/138.
4142
- Diagnostic LUA0029 (Hash strings are not supported in this lua version) has been removed (and replaced with LUA0036) by @GGG-KILLER in https://github.com/LorettaDevs/Loretta/pull/138.
43+
- Diagnostic LUA0002 (Unescaped line break in string) has been removed by @GGG-KILLER in https://github.com/LorettaDevs/Loretta/pull/146.
4244

4345
### Fixed
4446

src/Compilers/Lua/Portable/Errors/ErrorCode.cs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
1-
namespace Loretta.CodeAnalysis.Lua
1+
using System.Diagnostics.CodeAnalysis;
2+
3+
namespace Loretta.CodeAnalysis.Lua
24
{
5+
[SuppressMessage("ReSharper", "InconsistentNaming")]
36
internal enum ErrorCode
47
{
58
Void = InternalErrorCode.Void,
69
Unknown = InternalErrorCode.Unknown,
710

811
// Lexer Errors
912
ERR_InvalidStringEscape = 1,
10-
ERR_UnescapedLineBreakInString = 2,
1113
ERR_UnfinishedString = 3,
1214
ERR_InvalidNumber = 4,
1315
ERR_NumericLiteralTooLarge = 5,
@@ -55,7 +57,7 @@ internal enum ErrorCode
5557
ERR_IfExpressionConditionExpected = 1009,
5658
ERR_ExpressionExpected = 1010,
5759

58-
// Using part instead of term here because it's more user friendly.
60+
// Using part instead of term here because it's more user-friendly.
5961
ERR_InvalidExpressionPart = 1011,
6062
ERR_InvalidStatement = 1012,
6163
ERR_CompoundAssignmentNotSupportedInLuaVersion = 1013,

src/Compilers/Lua/Portable/LuaResources.Designer.cs

Lines changed: 0 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/Compilers/Lua/Portable/LuaResources.resx

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -266,9 +266,6 @@
266266
<data name="ERR_UnderscoreInNumericLiteralNotSupportedInVersion" xml:space="preserve">
267267
<value>Underscores in numeric literals are not supported in this lua version</value>
268268
</data>
269-
<data name="ERR_UnescapedLineBreakInString" xml:space="preserve">
270-
<value>Unescaped line break in string</value>
271-
</data>
272269
<data name="ERR_UnexpectedToken" xml:space="preserve">
273270
<value>Unexpected token '{0}'</value>
274271
<comment>{0} is the actual kind</comment>

src/Compilers/Lua/Portable/Parser/Lexer.ShortString.cs

Lines changed: 16 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -12,35 +12,18 @@ private void ScanStringLiteral(ref TokenInfo info)
1212
LorettaDebug.Assert(quote is '"' or '\'' or '`');
1313

1414
char ch;
15-
while (!IsAtEnd(ch = TextWindow.PeekChar()) && ch != quote)
15+
while (!IsAtEnd(ch = TextWindow.PeekChar()) && ch != quote && !CharUtils.IsNewLine(ch))
1616
{
17-
var charStart = TextWindow.Position;
18-
switch (ch)
17+
if (ch == '\\')
1918
{
20-
case '\\':
21-
{
22-
var high = ScanEscapeSequence(out var low);
23-
if (high != SlidingTextWindow.InvalidCharacter)
24-
{
25-
_builder.Append(high);
26-
if (low != SlidingTextWindow.InvalidCharacter) _builder.Append(low);
27-
}
28-
}
29-
break;
30-
31-
case '\n':
32-
case '\r':
33-
{
34-
_builder.Append(TextWindow.NextChar());
35-
char ch2;
36-
if (CharUtils.IsNewLine(ch2 = TextWindow.PeekChar()) && ch != ch2)
37-
_builder.Append(TextWindow.NextChar());
38-
39-
AddError(charStart, TextWindow.Position - charStart, ErrorCode.ERR_UnescapedLineBreakInString);
40-
}
41-
break;
42-
43-
default: _builder.Append(TextWindow.NextChar()); break;
19+
var high = ScanEscapeSequence(out var low);
20+
if (high == SlidingTextWindow.InvalidCharacter) continue;
21+
_builder.Append(high);
22+
if (low != SlidingTextWindow.InvalidCharacter) _builder.Append(low);
23+
}
24+
else
25+
{
26+
_builder.Append(TextWindow.NextChar());
4427
}
4528
}
4629

@@ -423,7 +406,7 @@ private void ScanInterpolatedStringLiteralContents(ArrayBuilder<Interpolation>?
423406
{
424407
while (true)
425408
{
426-
if (IsAtEnd(allowNewline: true))
409+
if (IsAtEnd(allowNewline: false))
427410
{
428411
// error: end of line/file before end of string pop out. Error will be reported in
429412
// ScanInterpolatedStringLiteralEnd
@@ -438,6 +421,11 @@ private void ScanInterpolatedStringLiteralContents(ArrayBuilder<Interpolation>?
438421
HandleOpenBraceInContent(interpolations);
439422
continue;
440423

424+
case '\\':
425+
// We need to handle escapes but not care about their issues
426+
lexer.ScanEscapeSequence(out _);
427+
continue;
428+
441429
default:
442430
// found some other character in the string portion. Just consume it as content and continue.
443431
lexer.TextWindow.AdvanceChar();

src/Compilers/Lua/Test/Portable/Lexical/LexicalErrorTests.cs

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
namespace Loretta.CodeAnalysis.Lua.UnitTests.Lexical
77
{
8-
public class LexicalErrorTests : LuaTestBase
8+
public sealed class LexicalErrorTests : LuaTestBase
99
{
1010
private static void ParseAndValidate(string text, LuaSyntaxOptions? options = null, params DiagnosticDescription[] expectedErrors) =>
1111
ParsingTestsBase.ParseAndValidate(text, options, expectedErrors);
@@ -45,36 +45,42 @@ public void Lexer_EmitsDiagnosticsOn_InvalidEscapes()
4545

4646
[Fact]
4747
[Trait("Category", "Lexer/Diagnostics")]
48+
public void Lexer_EmitsDiagnosticsOn_StringWithLineBreakButLexesRestProperly()
49+
{
50+
const string source = "local str1 = \"some\nlocal str2 = 'some\r\nlocal str3 = \"some\rlocal str4 = 'some";
51+
ParseAndValidate(source, null,
52+
// (1,14): error LUA0003: Unfinished string
53+
// local str1 = "some
54+
Diagnostic(ErrorCode.ERR_UnfinishedString, @"""some").WithLocation(1, 14),
55+
// (2,14): error LUA0003: Unfinished string
56+
// local str2 = 'some
57+
Diagnostic(ErrorCode.ERR_UnfinishedString, "'some").WithLocation(2, 14),
58+
// (3,14): error LUA0003: Unfinished string
59+
// local str3 = "some
60+
Diagnostic(ErrorCode.ERR_UnfinishedString, @"""some").WithLocation(3, 14),
61+
// (4,14): error LUA0003: Unfinished string
62+
// local str4 = 'some
63+
Diagnostic(ErrorCode.ERR_UnfinishedString, "'some").WithLocation(4, 14));
64+
}
4865

49-
public void Lexer_EmitsDiagnosticsOn_StringWithLineBreak()
66+
[Fact]
67+
[Trait("Category", "Lexer/Diagnostics")]
68+
public void Lexer_EmitsDiagnosticsOn_InterpolatedStringWithLineBreakButLexesRestProperly()
5069
{
51-
const string source = @"
52-
local str1 = ""some" + "\n" + @"text""
53-
local str2 = 'some" + "\n" + @"text'
54-
local str3 = ""some" + "\r" + @"text""
55-
local str4 = 'some" + "\r" + @"text'
56-
local str5 = ""some" + "\r\n" + @"text""
57-
local str6 = 'some" + "\r\n" + @"text'
58-
";
70+
const string source = "local str1 = `some\nlocal str2 = `some\r\nlocal str3 = `some\rlocal str4 = `some";
5971
ParseAndValidate(source, null,
60-
// (2,19): error LUA0002: Unescaped line break in string
61-
// local str1 = "some\ntext"
62-
Diagnostic(ErrorCode.ERR_UnescapedLineBreakInString, "\n").WithLocation(2, 19),
63-
// (4,19): error LUA0002: Unescaped line break in string
64-
// local str2 = 'some\ntext'
65-
Diagnostic(ErrorCode.ERR_UnescapedLineBreakInString, "\n").WithLocation(4, 19),
66-
// (6,19): error LUA0002: Unescaped line break in string
67-
// local str3 = "some\rtext"
68-
Diagnostic(ErrorCode.ERR_UnescapedLineBreakInString, "\r").WithLocation(6, 19),
69-
// (8,19): error LUA0002: Unescaped line break in string
70-
// local str4 = 'some\rtext'
71-
Diagnostic(ErrorCode.ERR_UnescapedLineBreakInString, "\r").WithLocation(8, 19),
72-
// (10,19): error LUA0002: Unescaped line break in string
73-
// local str5 = "some\r\ntext"
74-
Diagnostic(ErrorCode.ERR_UnescapedLineBreakInString, "\r\n").WithLocation(10, 19),
75-
// (12,19): error LUA0002: Unescaped line break in string
76-
// local str6 = 'some\r\ntext'
77-
Diagnostic(ErrorCode.ERR_UnescapedLineBreakInString, "\r\n").WithLocation(12, 19));
72+
// (1,18): error LUA0003: Unfinished string
73+
// local str1 = `some
74+
Diagnostic(ErrorCode.ERR_UnfinishedString, "e").WithLocation(1, 18),
75+
// (2,18): error LUA0003: Unfinished string
76+
// local str2 = `some
77+
Diagnostic(ErrorCode.ERR_UnfinishedString, "e").WithLocation(2, 18),
78+
// (3,18): error LUA0003: Unfinished string
79+
// local str3 = `some
80+
Diagnostic(ErrorCode.ERR_UnfinishedString, "e").WithLocation(3, 18),
81+
// (4,18): error LUA0003: Unfinished string
82+
// local str4 = `some
83+
Diagnostic(ErrorCode.ERR_UnfinishedString, "e").WithLocation(4, 18));
7884
}
7985

8086
[Theory]

0 commit comments

Comments
 (0)