-
Notifications
You must be signed in to change notification settings - Fork 166
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add antlr grammar for test file format (#728)
Add parser changes for testcase files Add function test coverage changes
- Loading branch information
Showing
21 changed files
with
19,458 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
lexer grammar FuncTestCaseLexer; | ||
|
||
import SubstraitLexer; | ||
|
||
options { | ||
caseInsensitive = true; | ||
} | ||
|
||
Whitespace : [ \t\n\r]+ -> channel(HIDDEN) ; | ||
|
||
TripleHash: '###'; | ||
SubstraitScalarTest: 'SUBSTRAIT_SCALAR_TEST'; | ||
SubstraitInclude: 'SUBSTRAIT_INCLUDE'; | ||
|
||
FormatVersion | ||
: 'v' DIGIT+ ('.' DIGIT+)? | ||
; | ||
|
||
DescriptionLine | ||
: '# ' ~[\r\n]* '\r'? '\n' | ||
; | ||
|
||
ErrorResult: '<!ERROR>'; | ||
UndefineResult: '<!UNDEFINED>'; | ||
Overflow: 'OVERFLOW'; | ||
Rounding: 'ROUNDING'; | ||
Error: 'ERROR'; | ||
Saturate: 'SATURATE'; | ||
Silent: 'SILENT'; | ||
TieToEven: 'TIE_TO_EVEN'; | ||
NaN: 'NAN'; | ||
|
||
IntegerLiteral | ||
: [+-]? Int | ||
; | ||
|
||
DecimalLiteral | ||
: [+-]? [0-9]+ ('.' [0-9]+)? | ||
; | ||
|
||
FloatLiteral | ||
: [+-]? [0-9]+ ('.' [0-9]*)? ( 'E' [+-]? [0-9]+ )? | ||
| [+-]? 'inf' | ||
| 'snan' | ||
; | ||
|
||
BooleanLiteral | ||
: 'true' | 'false' | ||
; | ||
|
||
fragment FourDigits: [0-9][0-9][0-9][0-9]; | ||
fragment TwoDigits: [0-9][0-9]; | ||
|
||
TimestampTzLiteral | ||
: '\'' FourDigits '-' TwoDigits '-' TwoDigits 'T' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )? | ||
[+-] TwoDigits ':' TwoDigits '\'' | ||
; | ||
|
||
TimestampLiteral | ||
: '\'' FourDigits '-' TwoDigits '-' TwoDigits 'T' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )? '\'' | ||
; | ||
|
||
TimeLiteral | ||
: '\'' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )? '\'' | ||
; | ||
|
||
DateLiteral | ||
: '\'' FourDigits '-' TwoDigits '-' TwoDigits '\'' | ||
; | ||
|
||
PeriodPrefix: 'P'; | ||
TimePrefix: 'T'; | ||
YearPrefix: 'Y'; | ||
MSuffix: 'M'; // used for both months and minutes | ||
DaySuffix: 'D'; | ||
HourSuffix: 'H'; | ||
SecondSuffix: 'S'; | ||
FractionalSecondSuffix: 'F'; | ||
OAngleBracket: Lt; | ||
CAngleBracket: Gt; | ||
|
||
IntervalYearLiteral | ||
: '\'' PeriodPrefix IntegerLiteral YearPrefix (IntegerLiteral MSuffix)? '\'' | ||
| '\'' PeriodPrefix IntegerLiteral MSuffix '\'' | ||
; | ||
|
||
IntervalDayLiteral | ||
: '\'' PeriodPrefix IntegerLiteral DaySuffix (TimePrefix TimeInterval)? '\'' | ||
| '\'' PeriodPrefix TimePrefix TimeInterval '\'' | ||
; | ||
|
||
fragment TimeInterval | ||
: IntegerLiteral HourSuffix (IntegerLiteral MSuffix)? (IntegerLiteral SecondSuffix)? | ||
(IntegerLiteral FractionalSecondSuffix)? | ||
| IntegerLiteral MSuffix (IntegerLiteral SecondSuffix)? (IntegerLiteral FractionalSecondSuffix)? | ||
| IntegerLiteral SecondSuffix (IntegerLiteral FractionalSecondSuffix)? | ||
| IntegerLiteral FractionalSecondSuffix | ||
; | ||
|
||
NullLiteral: 'null'; | ||
|
||
StringLiteral | ||
: '\'' ('\\' . | '\'\'' | ~['\\])* '\'' | ||
; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,215 @@ | ||
parser grammar FuncTestCaseParser; | ||
|
||
options { | ||
caseInsensitive = true; | ||
tokenVocab=SubstraitLexer; | ||
tokenVocab=FuncTestCaseLexer; | ||
} | ||
|
||
doc | ||
: header testGroup+ EOF | ||
; | ||
|
||
header | ||
: version include | ||
; | ||
|
||
version | ||
: TripleHash SubstraitScalarTest Colon FormatVersion | ||
; | ||
|
||
include | ||
: TripleHash SubstraitInclude Colon StringLiteral (Comma StringLiteral)* | ||
; | ||
|
||
testGroupDescription | ||
: DescriptionLine | ||
; | ||
|
||
testCase | ||
: functionName=Identifier OParen arguments CParen ( OBracket func_options CBracket )? Eq result | ||
; | ||
|
||
testGroup | ||
: testGroupDescription (testCase)+ | ||
; | ||
|
||
arguments | ||
: argument (Comma argument)* | ||
; | ||
|
||
result | ||
: argument | ||
| substraitError | ||
; | ||
|
||
argument | ||
: nullArg | ||
| intArg | ||
| floatArg | ||
| booleanArg | ||
| stringArg | ||
| decimalArg | ||
| dateArg | ||
| timeArg | ||
| timestampArg | ||
| timestampTzArg | ||
| intervalYearArg | ||
| intervalDayArg | ||
; | ||
|
||
numericLiteral | ||
: DecimalLiteral | IntegerLiteral | floatLiteral | ||
; | ||
|
||
floatLiteral | ||
: FloatLiteral | NaN | ||
; | ||
|
||
nullArg: NullLiteral DoubleColon datatype; | ||
|
||
intArg: IntegerLiteral DoubleColon (I8 | I16 | I32 | I64); | ||
|
||
floatArg: numericLiteral DoubleColon (FP32 | FP64); | ||
|
||
decimalArg | ||
: numericLiteral DoubleColon decimalType | ||
; | ||
|
||
booleanArg | ||
: BooleanLiteral DoubleColon Bool | ||
; | ||
|
||
stringArg | ||
: StringLiteral DoubleColon Str | ||
; | ||
|
||
dateArg | ||
: DateLiteral DoubleColon Date | ||
; | ||
|
||
timeArg | ||
: TimeLiteral DoubleColon Time | ||
; | ||
|
||
timestampArg | ||
: TimestampLiteral DoubleColon Ts | ||
; | ||
|
||
timestampTzArg | ||
: TimestampTzLiteral DoubleColon TsTZ | ||
; | ||
|
||
intervalYearArg | ||
: IntervalYearLiteral DoubleColon IYear | ||
; | ||
|
||
intervalDayArg | ||
: IntervalDayLiteral DoubleColon IDay | ||
; | ||
|
||
intervalYearLiteral | ||
: PeriodPrefix (years=IntegerLiteral YearPrefix) (months=IntegerLiteral MSuffix)? | ||
| PeriodPrefix (months=IntegerLiteral MSuffix) | ||
; | ||
|
||
intervalDayLiteral | ||
: PeriodPrefix (days=IntegerLiteral DaySuffix) (TimePrefix timeInterval)? | ||
| PeriodPrefix TimePrefix timeInterval | ||
; | ||
|
||
timeInterval | ||
: hours=IntegerLiteral HourSuffix (minutes=IntegerLiteral MSuffix)? (seconds=IntegerLiteral SecondSuffix)? | ||
(fractionalSeconds=IntegerLiteral FractionalSecondSuffix)? | ||
| minutes=IntegerLiteral MSuffix (seconds=IntegerLiteral SecondSuffix)? (fractionalSeconds=IntegerLiteral FractionalSecondSuffix)? | ||
| seconds=IntegerLiteral SecondSuffix (fractionalSeconds=IntegerLiteral FractionalSecondSuffix)? | ||
| fractionalSeconds=IntegerLiteral FractionalSecondSuffix | ||
; | ||
|
||
datatype | ||
: scalarType | ||
| parameterizedType | ||
; | ||
|
||
scalarType | ||
: Bool #Boolean | ||
| I8 #i8 | ||
| I16 #i16 | ||
| I32 #i32 | ||
| I64 #i64 | ||
| FP32 #fp32 | ||
| FP64 #fp64 | ||
| Str #string | ||
| Binary #binary | ||
| Ts #timestamp | ||
| TsTZ #timestampTz | ||
| Date #date | ||
| Time #time | ||
| IDay #intervalDay | ||
| IYear #intervalYear | ||
| UUID #uuid | ||
| UserDefined Identifier #userDefined | ||
; | ||
|
||
fixedCharType | ||
: FChar isnull=QMark? OAngleBracket len=numericParameter CAngleBracket #fixedChar | ||
; | ||
|
||
varCharType | ||
: VChar isnull=QMark? OAngleBracket len=numericParameter CAngleBracket #varChar | ||
; | ||
|
||
fixedBinaryType | ||
: FBin isnull=QMark? OAngleBracket len=numericParameter CAngleBracket #fixedBinary | ||
; | ||
|
||
decimalType | ||
: Dec isnull=QMark? (OAngleBracket precision=numericParameter Comma scale=numericParameter CAngleBracket)? #decimal | ||
; | ||
|
||
precisionTimestampType | ||
: PTs isnull=QMark? OAngleBracket precision=numericParameter CAngleBracket #precisionTimestamp | ||
; | ||
|
||
precisionTimestampTZType | ||
: PTsTZ isnull=QMark? OAngleBracket precision=numericParameter CAngleBracket #precisionTimestampTZ | ||
; | ||
|
||
parameterizedType | ||
: fixedCharType | ||
| varCharType | ||
| fixedBinaryType | ||
| decimalType | ||
| precisionTimestampType | ||
| precisionTimestampTZType | ||
// TODO implement the rest of the parameterized types | ||
// | Struct isnull='?'? Lt expr (Comma expr)* Gt #struct | ||
// | NStruct isnull='?'? Lt Identifier expr (Comma Identifier expr)* Gt #nStruct | ||
// | List isnull='?'? Lt expr Gt #list | ||
// | Map isnull='?'? Lt key=expr Comma value=expr Gt #map | ||
; | ||
|
||
numericParameter | ||
: IntegerLiteral #integerLiteral | ||
; | ||
|
||
substraitError | ||
: ErrorResult | UndefineResult | ||
; | ||
|
||
func_option | ||
: option_name Colon option_value | ||
; | ||
|
||
option_name | ||
: Overflow | Rounding | ||
| Identifier | ||
; | ||
|
||
option_value | ||
: Error | Saturate | Silent | TieToEven | NaN | ||
; | ||
|
||
func_options | ||
: func_option (Comma func_option)* | ||
; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
ANTLR_JAR=antlr-4.13.2-complete.jar | ||
TYPE_GRAMMAR=SubstraitLexer.g4 SubstraitType.g4 | ||
TYPE_OUTPUT_DIR=../tests/type/antlr_parser | ||
TESTCASE_GRAMMAR=FuncTestCaseLexer.g4 FuncTestCaseParser.g4 | ||
TESTCASE_OUTPUT_DIR=../tests/coverage/antlr_parser | ||
|
||
generate_testcase_parser: | ||
java -jar $(ANTLR_JAR) -visitor -Dlanguage=Python3 -o $(TESTCASE_OUTPUT_DIR) $(TESTCASE_GRAMMAR) | ||
|
||
generate_type_parser: | ||
java -jar $(ANTLR_JAR) -visitor -Dlanguage=Python3 -o $(TYPE_OUTPUT_DIR) $(TYPE_GRAMMAR) | ||
|
||
clean: | ||
rm -rf $(TYPE_OUTPUT_DIR)/*.py $(TYPE_OUTPUT_DIR)/*.tokens $(TYPE_OUTPUT_DIR)/*.interp | ||
rm -rf $(TESTCASE_OUTPUT_DIR)/*.py $(TESTCASE_OUTPUT_DIR)/*.tokens $(TESTCASE_OUTPUT_DIR)/*.interp |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
### SUBSTRAIT_SCALAR_TEST: v1.0 | ||
### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' | ||
|
||
# basic: Basic examples without any special cases | ||
add(120::i8, 5::i8) = 125::i8 | ||
add(100::i16, 100::i16) = 200::i16 | ||
add(30000::i32, 30000::i32) = 60000::i32 | ||
add(2000000000::i64, 2000000000::i64) = 4000000000::i64 | ||
|
||
# overflow: Examples demonstrating overflow behavior | ||
add(120::i8, 10::i8) [overflow:ERROR] = <!ERROR> | ||
add(30000::i16, 30000::i16) [overflow:ERROR] = <!ERROR> | ||
add(2000000000::i32, 2000000000::i32) [overflow:ERROR] = <!ERROR> | ||
add(9223372036854775807::i64, 1::i64) [overflow:ERROR] = <!ERROR> | ||
|
||
# overflow: Examples demonstrating overflow behavior tests: overflow with SATURATE | ||
add(120::i8, 10::i8) [overflow:SATURATE] = 127::i8 | ||
add(-120::i8, -10::i8) [overflow:SATURATE] = -128::i8 | ||
|
||
# overflow: Examples demonstrating overflow behavior tests: overflow with SILENT | ||
add(120::i8, 10::i8) [overflow:SILENT] = <!UNDEFINED> | ||
|
||
# floating_exception: Examples demonstrating exceptional floating point cases | ||
add(1.5e+308::fp64, 1.5e+308::fp64) = inf::fp64 | ||
add(-1.5e+308::fp64, -1.5e+308::fp64) = -inf::fp64 | ||
|
||
# rounding: Examples demonstrating floating point rounding behavior | ||
add(4.5::fp32, 2.500001::fp32) [rounding:TIE_TO_EVEN] = 7.000001::fp32 | ||
|
||
# types: Examples demonstrating behavior of different data types | ||
add(4.5::fp64, 2.5000007152557373::fp64) = 7.00000071525573::fp64 |
Oops, something went wrong.