Skip to content

Commit

Permalink
feat: add antlr grammar for test file format (#728)
Browse files Browse the repository at this point in the history
Add parser changes for testcase files
Add function test coverage changes
  • Loading branch information
scgkiran authored Nov 7, 2024
1 parent 2e13d0b commit 752aa63
Show file tree
Hide file tree
Showing 21 changed files with 19,458 additions and 0 deletions.
1 change: 1 addition & 0 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
ignore = E203, E266, E501, W503, F403, F401
max-line-length = 88
select = B,C,E,F,W,T4,B9
exclude = tests/coverage/antlr_parser/*.py
8 changes: 8 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,11 @@ repos:
rev: 6.1.0
hooks:
- id: flake8
- repo: local
hooks:
- id: check-substrait-extensions
name: Check Substrait extensions
entry: pytest tests/test_extensions.py::test_read_substrait_extensions
language: python
pass_filenames: false

104 changes: 104 additions & 0 deletions grammar/FuncTestCaseLexer.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
lexer grammar FuncTestCaseLexer;

import SubstraitLexer;

options {
caseInsensitive = true;
}

Whitespace : [ \t\n\r]+ -> channel(HIDDEN) ;

TripleHash: '###';
SubstraitScalarTest: 'SUBSTRAIT_SCALAR_TEST';
SubstraitInclude: 'SUBSTRAIT_INCLUDE';

FormatVersion
: 'v' DIGIT+ ('.' DIGIT+)?
;

DescriptionLine
: '# ' ~[\r\n]* '\r'? '\n'
;

ErrorResult: '<!ERROR>';
UndefineResult: '<!UNDEFINED>';
Overflow: 'OVERFLOW';
Rounding: 'ROUNDING';
Error: 'ERROR';
Saturate: 'SATURATE';
Silent: 'SILENT';
TieToEven: 'TIE_TO_EVEN';
NaN: 'NAN';

IntegerLiteral
: [+-]? Int
;

DecimalLiteral
: [+-]? [0-9]+ ('.' [0-9]+)?
;

FloatLiteral
: [+-]? [0-9]+ ('.' [0-9]*)? ( 'E' [+-]? [0-9]+ )?
| [+-]? 'inf'
| 'snan'
;

BooleanLiteral
: 'true' | 'false'
;

fragment FourDigits: [0-9][0-9][0-9][0-9];
fragment TwoDigits: [0-9][0-9];

TimestampTzLiteral
: '\'' FourDigits '-' TwoDigits '-' TwoDigits 'T' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )?
[+-] TwoDigits ':' TwoDigits '\''
;

TimestampLiteral
: '\'' FourDigits '-' TwoDigits '-' TwoDigits 'T' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )? '\''
;

TimeLiteral
: '\'' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )? '\''
;

DateLiteral
: '\'' FourDigits '-' TwoDigits '-' TwoDigits '\''
;

PeriodPrefix: 'P';
TimePrefix: 'T';
YearPrefix: 'Y';
MSuffix: 'M'; // used for both months and minutes
DaySuffix: 'D';
HourSuffix: 'H';
SecondSuffix: 'S';
FractionalSecondSuffix: 'F';
OAngleBracket: Lt;
CAngleBracket: Gt;

IntervalYearLiteral
: '\'' PeriodPrefix IntegerLiteral YearPrefix (IntegerLiteral MSuffix)? '\''
| '\'' PeriodPrefix IntegerLiteral MSuffix '\''
;

IntervalDayLiteral
: '\'' PeriodPrefix IntegerLiteral DaySuffix (TimePrefix TimeInterval)? '\''
| '\'' PeriodPrefix TimePrefix TimeInterval '\''
;

fragment TimeInterval
: IntegerLiteral HourSuffix (IntegerLiteral MSuffix)? (IntegerLiteral SecondSuffix)?
(IntegerLiteral FractionalSecondSuffix)?
| IntegerLiteral MSuffix (IntegerLiteral SecondSuffix)? (IntegerLiteral FractionalSecondSuffix)?
| IntegerLiteral SecondSuffix (IntegerLiteral FractionalSecondSuffix)?
| IntegerLiteral FractionalSecondSuffix
;

NullLiteral: 'null';

StringLiteral
: '\'' ('\\' . | '\'\'' | ~['\\])* '\''
;
215 changes: 215 additions & 0 deletions grammar/FuncTestCaseParser.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
parser grammar FuncTestCaseParser;

options {
caseInsensitive = true;
tokenVocab=SubstraitLexer;
tokenVocab=FuncTestCaseLexer;
}

doc
: header testGroup+ EOF
;

header
: version include
;

version
: TripleHash SubstraitScalarTest Colon FormatVersion
;

include
: TripleHash SubstraitInclude Colon StringLiteral (Comma StringLiteral)*
;

testGroupDescription
: DescriptionLine
;

testCase
: functionName=Identifier OParen arguments CParen ( OBracket func_options CBracket )? Eq result
;

testGroup
: testGroupDescription (testCase)+
;

arguments
: argument (Comma argument)*
;

result
: argument
| substraitError
;

argument
: nullArg
| intArg
| floatArg
| booleanArg
| stringArg
| decimalArg
| dateArg
| timeArg
| timestampArg
| timestampTzArg
| intervalYearArg
| intervalDayArg
;

numericLiteral
: DecimalLiteral | IntegerLiteral | floatLiteral
;

floatLiteral
: FloatLiteral | NaN
;

nullArg: NullLiteral DoubleColon datatype;

intArg: IntegerLiteral DoubleColon (I8 | I16 | I32 | I64);

floatArg: numericLiteral DoubleColon (FP32 | FP64);

decimalArg
: numericLiteral DoubleColon decimalType
;

booleanArg
: BooleanLiteral DoubleColon Bool
;

stringArg
: StringLiteral DoubleColon Str
;

dateArg
: DateLiteral DoubleColon Date
;

timeArg
: TimeLiteral DoubleColon Time
;

timestampArg
: TimestampLiteral DoubleColon Ts
;

timestampTzArg
: TimestampTzLiteral DoubleColon TsTZ
;

intervalYearArg
: IntervalYearLiteral DoubleColon IYear
;

intervalDayArg
: IntervalDayLiteral DoubleColon IDay
;

intervalYearLiteral
: PeriodPrefix (years=IntegerLiteral YearPrefix) (months=IntegerLiteral MSuffix)?
| PeriodPrefix (months=IntegerLiteral MSuffix)
;

intervalDayLiteral
: PeriodPrefix (days=IntegerLiteral DaySuffix) (TimePrefix timeInterval)?
| PeriodPrefix TimePrefix timeInterval
;

timeInterval
: hours=IntegerLiteral HourSuffix (minutes=IntegerLiteral MSuffix)? (seconds=IntegerLiteral SecondSuffix)?
(fractionalSeconds=IntegerLiteral FractionalSecondSuffix)?
| minutes=IntegerLiteral MSuffix (seconds=IntegerLiteral SecondSuffix)? (fractionalSeconds=IntegerLiteral FractionalSecondSuffix)?
| seconds=IntegerLiteral SecondSuffix (fractionalSeconds=IntegerLiteral FractionalSecondSuffix)?
| fractionalSeconds=IntegerLiteral FractionalSecondSuffix
;

datatype
: scalarType
| parameterizedType
;

scalarType
: Bool #Boolean
| I8 #i8
| I16 #i16
| I32 #i32
| I64 #i64
| FP32 #fp32
| FP64 #fp64
| Str #string
| Binary #binary
| Ts #timestamp
| TsTZ #timestampTz
| Date #date
| Time #time
| IDay #intervalDay
| IYear #intervalYear
| UUID #uuid
| UserDefined Identifier #userDefined
;

fixedCharType
: FChar isnull=QMark? OAngleBracket len=numericParameter CAngleBracket #fixedChar
;

varCharType
: VChar isnull=QMark? OAngleBracket len=numericParameter CAngleBracket #varChar
;

fixedBinaryType
: FBin isnull=QMark? OAngleBracket len=numericParameter CAngleBracket #fixedBinary
;

decimalType
: Dec isnull=QMark? (OAngleBracket precision=numericParameter Comma scale=numericParameter CAngleBracket)? #decimal
;

precisionTimestampType
: PTs isnull=QMark? OAngleBracket precision=numericParameter CAngleBracket #precisionTimestamp
;

precisionTimestampTZType
: PTsTZ isnull=QMark? OAngleBracket precision=numericParameter CAngleBracket #precisionTimestampTZ
;

parameterizedType
: fixedCharType
| varCharType
| fixedBinaryType
| decimalType
| precisionTimestampType
| precisionTimestampTZType
// TODO implement the rest of the parameterized types
// | Struct isnull='?'? Lt expr (Comma expr)* Gt #struct
// | NStruct isnull='?'? Lt Identifier expr (Comma Identifier expr)* Gt #nStruct
// | List isnull='?'? Lt expr Gt #list
// | Map isnull='?'? Lt key=expr Comma value=expr Gt #map
;

numericParameter
: IntegerLiteral #integerLiteral
;

substraitError
: ErrorResult | UndefineResult
;

func_option
: option_name Colon option_value
;

option_name
: Overflow | Rounding
| Identifier
;

option_value
: Error | Saturate | Silent | TieToEven | NaN
;

func_options
: func_option (Comma func_option)*
;
15 changes: 15 additions & 0 deletions grammar/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
ANTLR_JAR=antlr-4.13.2-complete.jar
TYPE_GRAMMAR=SubstraitLexer.g4 SubstraitType.g4
TYPE_OUTPUT_DIR=../tests/type/antlr_parser
TESTCASE_GRAMMAR=FuncTestCaseLexer.g4 FuncTestCaseParser.g4
TESTCASE_OUTPUT_DIR=../tests/coverage/antlr_parser

generate_testcase_parser:
java -jar $(ANTLR_JAR) -visitor -Dlanguage=Python3 -o $(TESTCASE_OUTPUT_DIR) $(TESTCASE_GRAMMAR)

generate_type_parser:
java -jar $(ANTLR_JAR) -visitor -Dlanguage=Python3 -o $(TYPE_OUTPUT_DIR) $(TYPE_GRAMMAR)

clean:
rm -rf $(TYPE_OUTPUT_DIR)/*.py $(TYPE_OUTPUT_DIR)/*.tokens $(TYPE_OUTPUT_DIR)/*.interp
rm -rf $(TESTCASE_OUTPUT_DIR)/*.py $(TESTCASE_OUTPUT_DIR)/*.tokens $(TESTCASE_OUTPUT_DIR)/*.interp
Empty file added tests/__init__.py
Empty file.
31 changes: 31 additions & 0 deletions tests/cases/arithmetic/add.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
### SUBSTRAIT_SCALAR_TEST: v1.0
### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'

# basic: Basic examples without any special cases
add(120::i8, 5::i8) = 125::i8
add(100::i16, 100::i16) = 200::i16
add(30000::i32, 30000::i32) = 60000::i32
add(2000000000::i64, 2000000000::i64) = 4000000000::i64

# overflow: Examples demonstrating overflow behavior
add(120::i8, 10::i8) [overflow:ERROR] = <!ERROR>
add(30000::i16, 30000::i16) [overflow:ERROR] = <!ERROR>
add(2000000000::i32, 2000000000::i32) [overflow:ERROR] = <!ERROR>
add(9223372036854775807::i64, 1::i64) [overflow:ERROR] = <!ERROR>

# overflow: Examples demonstrating overflow behavior tests: overflow with SATURATE
add(120::i8, 10::i8) [overflow:SATURATE] = 127::i8
add(-120::i8, -10::i8) [overflow:SATURATE] = -128::i8

# overflow: Examples demonstrating overflow behavior tests: overflow with SILENT
add(120::i8, 10::i8) [overflow:SILENT] = <!UNDEFINED>

# floating_exception: Examples demonstrating exceptional floating point cases
add(1.5e+308::fp64, 1.5e+308::fp64) = inf::fp64
add(-1.5e+308::fp64, -1.5e+308::fp64) = -inf::fp64

# rounding: Examples demonstrating floating point rounding behavior
add(4.5::fp32, 2.500001::fp32) [rounding:TIE_TO_EVEN] = 7.000001::fp32

# types: Examples demonstrating behavior of different data types
add(4.5::fp64, 2.5000007152557373::fp64) = 7.00000071525573::fp64
Loading

0 comments on commit 752aa63

Please sign in to comment.