Skip to content

Commit 236181f

Browse files
mvzinkMohamedAbdeen-rs
authored andcommitted
readyset-sql-parsing: Wrap expression subparser
This allows tests which directly use the expression subparser to parse with both sqlparser-rs and nom-sql to verify parity. Change-Id: I91402127c9e2aa80f0037180ab2514f543214fd2 Reviewed-on: https://gerrit.readyset.name/c/readyset/+/9102 Tested-by: Buildkite CI Reviewed-by: Michael Zink <[email protected]> Reviewed-by: Mohamed Yasser Abdelhamed Abdeen <[email protected]>
1 parent 2d6215c commit 236181f

File tree

17 files changed

+185
-63
lines changed

17 files changed

+185
-63
lines changed

Cargo.lock

+3-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ slotmap = "1.0.7"
223223
smallvec = { version = "1.13.2", default-features = false }
224224
socket2 = "0.5"
225225
sqlformat = "0.3.1"
226-
sqlparser = { git = "https://github.com/apache/datafusion-sqlparser-rs", rev = "53aba68e2dc758e591292115b1dd5faf71374346" }
226+
sqlparser = { git = "https://github.com/apache/datafusion-sqlparser-rs", rev = "ac0367ce1d6f71d80108d9e19be4b6574b832080" }
227227
strawpoll = "0.2.3"
228228
streaming-iterator = "0.1"
229229
stringprep = "0.1.4"

dataflow-expression/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ postgres = { workspace = true, features = [
3838
"with-serde_json-1",
3939
"with-bit-vec-0_6",
4040
] }
41+
readyset-sql-parsing = { path = "../readyset-sql-parsing" }
4142
regex = { workspace = true }
4243
serial_test = { workspace = true }
4344
test-utils = { path = "../test-utils" }

dataflow-expression/src/eval.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -373,10 +373,10 @@ impl Expr {
373373
#[cfg(test)]
374374
mod tests {
375375
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
376-
use nom_sql::parse_expr;
377376
use readyset_data::{ArrayD, Collation, DfType, Dialect, IxDyn, PgEnumMetadata};
378377
use readyset_errors::internal;
379378
use readyset_sql::Dialect::*;
379+
use readyset_sql_parsing::parse_expr;
380380
use serde_json::json;
381381
use Expr::*;
382382

dataflow-expression/src/eval/builtins.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -1436,9 +1436,9 @@ mod tests {
14361436
use chrono::NaiveTime;
14371437
use chrono_tz::{Asia, Atlantic};
14381438
use lazy_static::lazy_static;
1439-
use nom_sql::parse_expr;
14401439
use readyset_errors::{internal, internal_err};
14411440
use readyset_sql::Dialect::*;
1441+
use readyset_sql_parsing::parse_expr;
14421442
use readyset_util::arbitrary::{
14431443
arbitrary_timestamp_naive_date_time, arbitrary_timestamp_naive_date_time_for_timezone,
14441444
};
@@ -2099,7 +2099,7 @@ mod tests {
20992099

21002100
#[test]
21012101
fn substring_with_from_and_for() {
2102-
let expr = parse_and_lower("SUBSTRING('abcdef', c0, c1)", MySQL);
2102+
let expr = parse_and_lower("substring('abcdef', c0, c1)", MySQL);
21032103
let call_with =
21042104
|from: i64, len: i64| expr.eval::<DfValue>(&[from.into(), len.into()]).unwrap();
21052105

@@ -2116,21 +2116,21 @@ mod tests {
21162116

21172117
#[test]
21182118
fn substring_multibyte() {
2119-
let expr = parse_and_lower("SUBSTRING('é' from 1 for 1)", PostgreSQL);
2119+
let expr = parse_and_lower("substring('é' from 1 for 1)", PostgreSQL);
21202120
let res = expr.eval::<DfValue>(&[]).unwrap();
21212121
assert_eq!(res, "é".into());
21222122
}
21232123

21242124
#[test]
21252125
fn substring_with_from() {
2126-
let expr = parse_and_lower("SUBSTRING('abcdef' from c0)", PostgreSQL);
2126+
let expr = parse_and_lower("substring('abcdef' from c0)", PostgreSQL);
21272127
let res = expr.eval::<DfValue>(&[2.into()]).unwrap();
21282128
assert_eq!(res, "bcdef".into());
21292129
}
21302130

21312131
#[test]
21322132
fn substring_with_for() {
2133-
let expr = parse_and_lower("SUBSTRING('abcdef' for c0)", PostgreSQL);
2133+
let expr = parse_and_lower("substring('abcdef' for c0)", PostgreSQL);
21342134
let res = expr.eval::<DfValue>(&[3.into()]).unwrap();
21352135
assert_eq!(res, "abc".into());
21362136
}

dataflow-expression/src/lower.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -1495,10 +1495,10 @@ impl Expr {
14951495

14961496
#[cfg(test)]
14971497
pub(crate) mod tests {
1498-
use nom_sql::parse_expr;
14991498
use readyset_data::{Collation, PgEnumMetadata};
15001499
use readyset_sql::ast::{BinaryOperator as AstBinaryOperator, Float, Literal};
15011500
use readyset_sql::Dialect as ParserDialect;
1501+
use readyset_sql_parsing::parse_expr;
15021502

15031503
use super::*;
15041504

@@ -1729,7 +1729,7 @@ pub(crate) mod tests {
17291729

17301730
#[test]
17311731
fn substr_regular() {
1732-
let input = parse_expr(ParserDialect::MySQL, "SUBSTR('abcdefghi', 1, 7)").unwrap();
1732+
let input = parse_expr(ParserDialect::MySQL, "substr('abcdefghi', 1, 7)").unwrap();
17331733
let res = Expr::lower(input, Dialect::DEFAULT_MYSQL, &no_op_lower_context()).unwrap();
17341734
assert_eq!(
17351735
res,
@@ -1755,7 +1755,7 @@ pub(crate) mod tests {
17551755

17561756
#[test]
17571757
fn substring_regular() {
1758-
let input = parse_expr(ParserDialect::MySQL, "SUBSTRING('abcdefghi', 1, 7)").unwrap();
1758+
let input = parse_expr(ParserDialect::MySQL, "substring('abcdefghi', 1, 7)").unwrap();
17591759
let res = Expr::lower(input, Dialect::DEFAULT_MYSQL, &no_op_lower_context()).unwrap();
17601760
assert_eq!(
17611761
res,
@@ -1781,7 +1781,7 @@ pub(crate) mod tests {
17811781

17821782
#[test]
17831783
fn substring_without_string_arg() {
1784-
let input = parse_expr(ParserDialect::MySQL, "SUBSTRING(123 from 2)").unwrap();
1784+
let input = parse_expr(ParserDialect::MySQL, "substring(123 from 2)").unwrap();
17851785
let res = Expr::lower(input, Dialect::DEFAULT_MYSQL, &no_op_lower_context()).unwrap();
17861786
assert_eq!(res.ty(), &DfType::DEFAULT_TEXT);
17871787
}

dataflow-expression/tests/common/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
use dataflow_expression::{Expr, LowerContext};
2-
use nom_sql::parse_expr;
32
use readyset_data::{DfType, DfValue};
43
use readyset_errors::{internal, ReadySetResult};
54
use readyset_sql::ast::{Column, Relation};
5+
use readyset_sql_parsing::parse_expr;
66

77
#[derive(Debug, Clone, Copy)]
88
struct TestLowerContext;

dataflow-expression/tests/mysql_oracle.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -145,10 +145,10 @@ async fn example_exprs_eval_same_as_mysql() {
145145
"json_overlaps('[]', '[]')",
146146
"json_overlaps('true', 'true')",
147147
"json_overlaps('[42]', '[0, 42, 0]')",
148-
"SUBSTRING('abcdef', 3)",
149-
"SUBSTRING('abcdef', 3, 2)",
150-
"SUBSTRING('abcdef', 1, 3)",
151-
"SUBSTRING('abcdef', '1', '3')",
148+
"substring('abcdef', 3)",
149+
"substring('abcdef', 3, 2)",
150+
"substring('abcdef', 1, 3)",
151+
"substring('abcdef', '1', '3')",
152152
"concat(1,2)",
153153
"concat('one',2)",
154154
"concat('one',2)",

readyset-mir/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ readyset-errors = { path = "../readyset-errors" }
2525
readyset-sql = { path = "../readyset-sql" }
2626

2727
[dev-dependencies]
28-
nom-sql = { path = "../nom-sql" }
28+
readyset-sql-parsing = { path = "../readyset-sql-parsing" }
2929
readyset-tracing = { path = "../readyset-tracing" }
3030

3131
[lints]

readyset-mir/src/rewrite/filters_to_join_keys.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -379,10 +379,10 @@ pub(crate) fn convert_filters_to_join_keys(query: &mut MirQuery<'_>) -> ReadySet
379379
#[cfg(test)]
380380
mod tests {
381381
use common::IndexType;
382-
use nom_sql::parse_expr;
383382
use readyset_client::ViewPlaceholder;
384383
use readyset_sql::ast::{self, ColumnSpecification, Relation, SqlType};
385384
use readyset_sql::Dialect;
385+
use readyset_sql_parsing::parse_expr;
386386

387387
use super::*;
388388
use crate::graph::MirGraph;

readyset-sql-parsing/src/lib.rs

+53-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
use readyset_sql::{ast::SqlQuery, Dialect};
1+
use readyset_sql::{
2+
ast::{Expr, SqlQuery},
3+
Dialect,
4+
};
25

36
#[cfg(feature = "sqlparser")]
47
use readyset_sql::{
@@ -471,13 +474,33 @@ fn parse_readyset_query(
471474
}
472475

473476
#[cfg(feature = "sqlparser")]
474-
pub fn parse_query(dialect: Dialect, input: impl AsRef<str>) -> Result<SqlQuery, String> {
475-
let nom_result = nom_sql::parse_query(dialect, input.as_ref());
477+
fn parse_readyset_expr(
478+
parser: &mut Parser,
479+
dialect: Dialect,
480+
_input: impl AsRef<str>,
481+
) -> Result<Expr, ReadysetParsingError> {
482+
Ok(parser.parse_expr()?.try_into_dialect(dialect)?)
483+
}
484+
485+
#[cfg(feature = "sqlparser")]
486+
fn parse_both_inner<S, T, NP, SP>(
487+
dialect: Dialect,
488+
input: S,
489+
nom_parser: NP,
490+
sqlparser_parser: SP,
491+
) -> Result<T, String>
492+
where
493+
T: PartialEq + std::fmt::Debug,
494+
S: AsRef<str>,
495+
for<'a> NP: FnOnce(Dialect, &'a str) -> Result<T, String>,
496+
for<'a> SP: FnOnce(&mut Parser, Dialect, &'a str) -> Result<T, ReadysetParsingError>,
497+
{
498+
let nom_result = nom_parser(dialect, input.as_ref());
476499
let sqlparser_dialect = sqlparser_dialect_from_readyset_dialect(dialect);
477500
let sqlparser_result = Parser::new(sqlparser_dialect.as_ref())
478501
.try_with_sql(input.as_ref())
479502
.map_err(Into::into)
480-
.and_then(|mut p| parse_readyset_query(&mut p, dialect, input.as_ref()));
503+
.and_then(|mut p| sqlparser_parser(&mut p, dialect, input.as_ref()));
481504

482505
match (&nom_result, sqlparser_result) {
483506
(Ok(nom_ast), Ok(sqlparser_ast)) => {
@@ -515,7 +538,33 @@ pub fn parse_query(dialect: Dialect, input: impl AsRef<str>) -> Result<SqlQuery,
515538
nom_result
516539
}
517540

541+
#[cfg(feature = "sqlparser")]
542+
pub fn parse_query(dialect: Dialect, input: impl AsRef<str>) -> Result<SqlQuery, String> {
543+
parse_both_inner(
544+
dialect,
545+
input,
546+
|d, s| nom_sql::parse_query(d, s),
547+
|p, d, s| parse_readyset_query(p, d, s),
548+
)
549+
}
550+
518551
#[cfg(not(feature = "sqlparser"))]
519552
pub fn parse_query(dialect: Dialect, input: impl AsRef<str>) -> Result<SqlQuery, String> {
520553
nom_sql::parse_query(dialect, input.as_ref())
521554
}
555+
556+
/// Parses a single expression; only intended for use in tests.
557+
#[cfg(feature = "sqlparser")]
558+
pub fn parse_expr(dialect: Dialect, input: impl AsRef<str>) -> Result<Expr, String> {
559+
parse_both_inner(
560+
dialect,
561+
input,
562+
|d, s| nom_sql::parse_expr(d, s),
563+
|p, d, s| parse_readyset_expr(p, d, s),
564+
)
565+
}
566+
567+
#[cfg(not(feature = "sqlparser"))]
568+
pub fn parse_expr(dialect: Dialect, input: impl AsRef<str>) -> Result<Expr, String> {
569+
nom_sql::parse_expr(dialect, input.as_ref())
570+
}

readyset-sql-parsing/tests/parity.rs

+21-13
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,23 @@ fn test_convert_with_using() {
328328
);
329329
}
330330

331+
#[test]
332+
fn test_empty_insert() {
333+
// MySQL parses `insert into t () VALUES ...` into `insert into t VALUES ...`
334+
check_parse_mysql!("INSERT INTO t VALUES ()");
335+
}
336+
337+
#[test]
338+
#[cfg(feature = "sqlparser")]
339+
#[should_panic(expected = "sqlparser error")]
340+
fn test_empty_insert_fails_in_postgres() {
341+
// Invalid postgres syntax, parsed by nom but not by sqlparser
342+
// Invalid because of empty values list
343+
check_parse_postgres!("INSERT INTO t VALUES ()");
344+
// Invalid because of empty cols list, accepted by mysql thought
345+
check_parse_postgres!("INSERT INTO t () VALUES ()");
346+
}
347+
331348
#[test]
332349
fn test_column_default_without_parens() {
333350
check_parse_both!(
@@ -354,10 +371,10 @@ fn test_op_any_all() {
354371

355372
#[test]
356373
fn test_substring() {
357-
check_parse_both!("SELECT SUBSTRING('foo', 1, 2) FROM t");
358-
check_parse_both!("SELECT SUBSTR('foo', 1, 2) FROM t");
359-
check_parse_both!("SELECT SUBSTRING('foo' FROM 1 FOR 2) FROM t");
360-
check_parse_both!("SELECT SUBSTR('foo' FROM 1 FOR 2) FROM t");
374+
check_parse_both!("SELECT substring('foo', 1, 2) FROM t");
375+
check_parse_both!("SELECT substr('foo', 1, 2) FROM t");
376+
check_parse_both!("SELECT substring('foo' FROM 1 FOR 2) FROM t");
377+
check_parse_both!("SELECT substr('foo' FROM 1 FOR 2) FROM t");
361378
}
362379

363380
#[test]
@@ -390,15 +407,6 @@ fn test_unsupported_op() {
390407
check_parse_postgres!("create cache from selecT * from t where a !~* 'f.*' and a ~ 'g.*'");
391408
}
392409

393-
#[test]
394-
#[cfg(feature = "sqlparser")]
395-
#[should_panic(expected = "sqlparser error")]
396-
/// Invalid postgres syntax, parsed by nom but not by sqlparser
397-
fn test_empty_insert_fails_in_postgres() {
398-
// Invalid because of empty values list
399-
check_parse_postgres!("INSERT INTO t VALUES ()");
400-
}
401-
402410
#[test]
403411
#[cfg(feature = "sqlparser")]
404412
#[should_panic(expected = "sqlparser error")]

readyset-sql-passes/src/expr/constant_fold.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,8 @@ pub fn constant_fold_expr(expr: &mut Expr, dialect: Dialect) {
7474

7575
#[cfg(test)]
7676
mod tests {
77-
use nom_sql::parse_expr;
7877
use readyset_sql::DialectDisplay;
78+
use readyset_sql_parsing::parse_expr;
7979

8080
use super::*;
8181

readyset-sql-passes/src/expr/normalize_negation.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,8 @@ pub fn normalize_negation(expr: &mut Expr) {
101101

102102
#[cfg(test)]
103103
mod tests {
104-
use nom_sql::parse_expr;
105104
use readyset_sql::{Dialect, DialectDisplay};
105+
use readyset_sql_parsing::parse_expr;
106106

107107
use super::*;
108108

@@ -175,8 +175,9 @@ mod tests {
175175

176176
#[test]
177177
fn normalize_question_mark() {
178-
let mut expr = parse_expr(Dialect::MySQL, "NOT (j ? 'key' AND NOT b)").unwrap();
179-
let expected = parse_expr(Dialect::MySQL, "NOT j ? 'key' OR b").unwrap();
178+
// The ? Operator is Postgres-specific
179+
let mut expr = parse_expr(Dialect::PostgreSQL, "NOT (j ? 'key' AND NOT b)").unwrap();
180+
let expected = parse_expr(Dialect::PostgreSQL, "NOT j ? 'key' OR b").unwrap();
180181
normalize_negation(&mut expr);
181182
assert_eq!(expr, expected);
182183
}

0 commit comments

Comments
 (0)