Skip to content

Commit 2d5c8d5

Browse files
authored
Feat/error recover predicate (#274)
* feat: optimize pgsql grammar * feat: add sql parser base * feat: apply SQLParserBase * feat: add geAllEntities method * test: test collect table when missing column
1 parent 09a7dc0 commit 2d5c8d5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+17162
-19353
lines changed

src/grammar/flinksql/FlinkSqlParser.g4

+5
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ parser grammar FlinkSqlParser;
77
options {
88
tokenVocab=FlinkSqlLexer;
99
caseInsensitive= true;
10+
superClass=SQLParserBase;
11+
}
12+
13+
@header {
14+
import SQLParserBase from '../SQLParserBase';
1015
}
1116

1217
program

src/grammar/hive/HiveSqlParser.g4

+7
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@ options
2828
{
2929
tokenVocab=HiveSqlLexer;
3030
caseInsensitive= true;
31+
superClass=SQLParserBase;
32+
}
33+
34+
@header {
35+
import SQLParserBase from '../SQLParserBase';
3136
}
3237

3338
program
@@ -802,6 +807,7 @@ columnNameList
802807

803808
columnName
804809
: id_ (DOT id_)*
810+
| {this.shouldMatchEmpty()}?
805811
;
806812

807813
columnNameCreate
@@ -1859,6 +1865,7 @@ VALUES(1),(2) means 2 rows, 1 column each.
18591865
VALUES(1,2),(3,4) means 2 rows, 2 columns each.
18601866
VALUES(1,2,3) means 1 row, 3 columns
18611867
*/
1868+
18621869
valuesClause
18631870
: KW_VALUES valuesTableConstructor
18641871
;

src/grammar/impala/ImpalaSqlParser.g4

+7-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ options
2222
{
2323
tokenVocab=ImpalaSqlLexer;
2424
caseInsensitive= true;
25+
superClass=SQLParserBase;
26+
}
27+
28+
@header {
29+
import SQLParserBase from '../SQLParserBase';
2530
}
2631

2732
program
@@ -75,7 +80,7 @@ createStatement
7580
createTableSelect
7681
: KW_CREATE KW_EXTERNAL? KW_TABLE ifNotExists? tableNameCreate (
7782
LPAREN columnDefinition (COMMA columnDefinition)* (COMMA constraintSpecification)? RPAREN
78-
)? (KW_PARTITIONED KW_BY (partitionedBy | columnAliases))? createCommonItem (
83+
)? (KW_PARTITIONED KW_BY (columnAliases | partitionedBy))? createCommonItem (
7984
KW_AS queryStatement
8085
)?
8186
;
@@ -555,6 +560,7 @@ functionNamePath
555560

556561
columnNamePath
557562
: qualifiedName
563+
| {this.shouldMatchEmpty()}?
558564
;
559565

560566
tableOrViewPath

src/grammar/mysql/MySqlParser.g4

+33-26
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ parser grammar MySqlParser;
3636
options {
3737
tokenVocab= MySqlLexer;
3838
caseInsensitive= true;
39+
superClass=SQLParserBase;
40+
}
41+
42+
@header {
43+
import SQLParserBase from '../SQLParserBase';
3944
}
4045

4146
// Top Level Description
@@ -212,8 +217,8 @@ administrationStatement
212217
;
213218

214219
utilityStatement
215-
: simpleDescribeStatement
216-
| fullDescribeStatement
220+
: fullDescribeStatement
221+
| simpleDescribeStatement
217222
| analyzeDescribeStatement
218223
| helpStatement
219224
| useStatement
@@ -2396,6 +2401,7 @@ columnNames
23962401
columnName
23972402
: uid (dottedId dottedId?)?
23982403
| .? dottedId dottedId?
2404+
| {this.shouldMatchEmpty()}?
23992405
;
24002406

24012407
tablespaceNameCreate
@@ -2751,12 +2757,12 @@ orReplace
27512757
// Functions
27522758

27532759
functionCall
2754-
: specificFunction # specificFunctionCall
2755-
| aggregateWindowedFunction # aggregateFunctionCall
2756-
| nonAggregateWindowedFunction # nonAggregateFunctionCall
2757-
| scalarFunctionName '(' functionArgs? ')' # scalarFunctionCall
2758-
| functionName '(' functionArgs? ')' # udfFunctionCall
2759-
| passwordFunctionClause # passwordFunctionCall
2760+
: specificFunction # specificFunctionCall
2761+
| aggregateWindowedFunction # aggregateFunctionCall
2762+
| nonAggregateWindowedFunction # nonAggregateFunctionCall
2763+
| scalarFunctionName ('(' ')' | '(' functionArgs ')') # scalarFunctionCall
2764+
| functionName ('(' ')' | '(' functionArgs ')') # udfFunctionCall
2765+
| passwordFunctionClause # passwordFunctionCall
27602766
;
27612767

27622768
specificFunction
@@ -2925,7 +2931,6 @@ functionArgs
29252931

29262932
functionArg
29272933
: constant
2928-
| columnName
29292934
| functionCall
29302935
| expression
29312936
;
@@ -2941,22 +2946,23 @@ expression
29412946
;
29422947

29432948
predicate
2944-
: predicate KW_NOT? KW_IN '(' (selectStatement | expressions) ')' # inPredicate
2945-
| predicate KW_IS nullNotnull # isNullPredicate
2946-
| left=predicate comparisonOperator right=predicate # binaryComparisonPredicate
2947-
| predicate comparisonOperator quantifier=(KW_ALL | KW_ANY | KW_SOME) '(' selectStatement ')' # subqueryComparisonPredicate
2948-
| predicate KW_NOT? KW_BETWEEN predicate KW_AND predicate # betweenPredicate
2949-
| predicate KW_SOUNDS KW_LIKE predicate # soundsLikePredicate
2950-
| predicate KW_NOT? KW_LIKE predicate (KW_ESCAPE STRING_LITERAL)? # likePredicate
2951-
| predicate KW_NOT? regex=(KW_REGEXP | KW_RLIKE) predicate # regexpPredicate
2952-
| predicate KW_MEMBER KW_OF '(' predicate ')' # jsonMemberOfPredicate
2953-
| expressionAtom # expressionAtomPredicate
2949+
: predicate KW_NOT? KW_IN '(' (selectStatement | expressions) ')' # inPredicate
2950+
| predicate KW_IS nullNotnull # isNullPredicate
2951+
| predicate comparisonOperator (
2952+
quantifier=(KW_ALL | KW_ANY | KW_SOME) '(' subQuery=selectStatement ')'
2953+
| right=predicate
2954+
) # binaryComparisonPredicate
2955+
| predicate KW_NOT? KW_BETWEEN predicate KW_AND predicate # betweenPredicate
2956+
| predicate KW_SOUNDS KW_LIKE predicate # soundsLikePredicate
2957+
| predicate KW_NOT? KW_LIKE predicate (KW_ESCAPE STRING_LITERAL)? # likePredicate
2958+
| predicate KW_NOT? regex=(KW_REGEXP | KW_RLIKE) predicate # regexpPredicate
2959+
| predicate KW_MEMBER KW_OF '(' predicate ')' # jsonMemberOfPredicate
2960+
| expressionAtom # expressionAtomPredicate
29542961
;
29552962

29562963
// Add in ASTVisitor nullNotnull in constant
29572964
expressionAtom
29582965
: constant # constantExpressionAtom
2959-
| columnName # columnNameExpressionAtom
29602966
| functionCall # functionCallExpressionAtom
29612967
| expressionAtom KW_COLLATE collationName # collateExpressionAtom
29622968
| mysqlVariable # mysqlVariableExpressionAtom
@@ -2968,9 +2974,10 @@ expressionAtom
29682974
| KW_EXISTS '(' selectStatement ')' # existsExpressionAtom
29692975
| '(' selectStatement ')' # subqueryExpressionAtom
29702976
| KW_INTERVAL expression intervalType # intervalExpressionAtom
2977+
| left=expressionAtom jsonOperator right=expressionAtom # jsonExpressionAtom
29712978
| left=expressionAtom bitOperator right=expressionAtom # bitExpressionAtom
29722979
| left=expressionAtom mathOperator right=expressionAtom # mathExpressionAtom
2973-
| left=expressionAtom jsonOperator right=expressionAtom # jsonExpressionAtom
2980+
| columnName # columnNameExpressionAtom
29742981
;
29752982

29762983
unaryOperator
@@ -2982,18 +2989,18 @@ unaryOperator
29822989
;
29832990

29842991
comparisonOperator
2985-
: comparisonBase
2986-
| '<' '>'
2992+
: '<' '>'
29872993
| '!' '='
29882994
| '<' '=' '>'
2995+
| comparisonBase
29892996
;
29902997

29912998
comparisonBase
2992-
: '='
2999+
: '<' '='
3000+
| '>' '='
3001+
| '='
29933002
| '>'
29943003
| '<'
2995-
| '<' '='
2996-
| '>' '='
29973004
;
29983005

29993006
logicalOperator

src/grammar/pgsql/PostgreSQLParser.g4

+7-1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ parser grammar PostgreSQLParser;
4242
options {
4343
tokenVocab= PostgreSQLLexer;
4444
caseInsensitive= true;
45+
superClass=SQLParserBase;
46+
}
47+
48+
@header {
49+
import SQLParserBase from '../SQLParserBase';
4550
}
4651

4752
program
@@ -4172,7 +4177,8 @@ procedure_name_create
41724177
;
41734178

41744179
column_name
4175-
: colid indirection_el* # columnName
4180+
: colid indirection_el* # columnName
4181+
| {this.shouldMatchEmpty()}? # columnNameMatch
41764182
;
41774183

41784184
column_name_create

src/grammar/spark/SparkSqlParser.g4

+6
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ parser grammar SparkSqlParser;
2727
options {
2828
tokenVocab=SparkSqlLexer;
2929
caseInsensitive= true;
30+
superClass=SQLParserBase;
31+
}
32+
33+
@header {
34+
import SQLParserBase from '../SQLParserBase';
3035
}
3136

3237
program
@@ -440,6 +445,7 @@ viewName
440445

441446
columnName
442447
: multipartIdentifier
448+
| {this.shouldMatchEmpty()}?
443449
;
444450

445451
columnNameSeq

src/grammar/trinosql/TrinoSql.g4

+8-2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ grammar TrinoSql;
2525

2626
options {
2727
caseInsensitive= true;
28+
superClass=SQLParserBase;
29+
}
30+
31+
@header {
32+
import SQLParserBase from '../SQLParserBase';
2833
}
2934

3035
tokens {
@@ -173,8 +178,8 @@ statement
173178
KW_WHERE where= booleanExpression
174179
)? # update
175180
| KW_MERGE KW_INTO tableName (KW_AS? identifier)? KW_USING relation KW_ON expression mergeCase+ # merge
176-
| KW_SHOW KW_COMMENT KW_ON KW_TABLE tableName # showTableComment
177-
| KW_SHOW KW_COMMENT KW_ON KW_COLUMN columnName # showColumnComment
181+
| KW_SHOW KW_COMMENT KW_ON KW_TABLE tableName # showTableComment // dtstack
182+
| KW_SHOW KW_COMMENT KW_ON KW_COLUMN columnName # showColumnComment // dtstack
178183
;
179184

180185
query
@@ -746,6 +751,7 @@ functionName
746751

747752
columnName
748753
: qualifiedName
754+
| {this.shouldMatchEmpty()}?
749755
;
750756

751757
columnNameCreate

src/lib/SQLParserBase.ts

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import * as antlr from "antlr4ng";
2+
export default abstract class SQLParserBase<T = antlr.ParserRuleContext> extends antlr.Parser{
3+
public constructor(input: antlr.TokenStream) {
4+
super(input);
5+
}
6+
7+
public abstract program(): T;
8+
9+
public caretTokenIndex = -1;
10+
11+
public entityCollecting = false;
12+
13+
public shouldMatchEmpty () {
14+
return this.entityCollecting
15+
&& this.tokenStream.LT(-1).tokenIndex <= this.caretTokenIndex
16+
&& this.tokenStream.LT(1).tokenIndex >= this.caretTokenIndex
17+
}
18+
}

src/lib/flinksql/FlinkSqlParser.ts

+4-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@ import { FlinkSqlParserVisitor } from "./FlinkSqlParserVisitor.js";
1111
type int = number;
1212

1313

14-
export class FlinkSqlParser extends antlr.Parser {
14+
import SQLParserBase from '../SQLParserBase';
15+
16+
17+
export class FlinkSqlParser extends SQLParserBase {
1518
public static readonly SPACE = 1;
1619
public static readonly COMMENT_INPUT = 2;
1720
public static readonly LINE_COMMENT = 3;

src/lib/flinksql/FlinkSqlParserListener.ts

+3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
import { ErrorNode, ParseTreeListener, ParserRuleContext, TerminalNode } from "antlr4ng";
44

55

6+
import SQLParserBase from '../SQLParserBase';
7+
8+
69
import { ProgramContext } from "./FlinkSqlParser.js";
710
import { SingleStatementContext } from "./FlinkSqlParser.js";
811
import { SqlStatementContext } from "./FlinkSqlParser.js";

src/lib/flinksql/FlinkSqlParserVisitor.ts

+3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
import { AbstractParseTreeVisitor } from "antlr4ng";
44

55

6+
import SQLParserBase from '../SQLParserBase';
7+
8+
69
import { ProgramContext } from "./FlinkSqlParser.js";
710
import { SingleStatementContext } from "./FlinkSqlParser.js";
811
import { SqlStatementContext } from "./FlinkSqlParser.js";

src/lib/hive/HiveSqlParser.interp

+1-1
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)