-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(fixer): metavariable expression parser (#112)
# Description Inital feature for auto-fixer v2. Implements a lexer and parser for processing Comby-style metavarible expressions (e.g., `:[var]` and `:[[function]]`). This package serves as the first parsing phase before main syntax parsing. Key features: - Lexer that tokenizes metavariable patterns and surrounding text - Parser that generates AST with pattern, hole, text, and block nodes - Support for both short (`:[name]`) and long (`:[[name]]`) metavariable hole-expression forms - Proper handling of nested block structures and whitespace This feature will replace the exsting AST-based auto fix functionality. ## Related Issue #111
- Loading branch information
Showing
8 changed files
with
1,095 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
package fixerv2 | ||
|
||
import ( | ||
"fmt" | ||
"regexp" | ||
"strings" | ||
|
||
parser "github.com/gnolang/tlin/fixer_v2/query" | ||
) | ||
|
||
// Pattern represents a pattern-rewrite pair for code transformation | ||
type Pattern struct { | ||
Match string | ||
Rewrite string | ||
} | ||
|
||
var ( | ||
whitespaceRegex = regexp.MustCompile(`\s+`) | ||
openBraceRegex = regexp.MustCompile(`\s*{\s*`) | ||
closeBraceRegex = regexp.MustCompile(`\s*}\s*`) | ||
) | ||
|
||
// normalizePattern replaces consecutive whitespaces with a single space | ||
// and standardizes the spacing around curly braces. | ||
// Then it trims any leading or trailing whitespace. | ||
// This helps unify the style of the pattern for regex generation. | ||
// | ||
// Note: this function is only used for testing | ||
func normalizePattern(pattern string) string { | ||
pattern = whitespaceRegex.ReplaceAllString(pattern, " ") | ||
pattern = openBraceRegex.ReplaceAllString(pattern, " { ") | ||
pattern = closeBraceRegex.ReplaceAllString(pattern, " } ") | ||
return strings.TrimSpace(pattern) | ||
} | ||
|
||
// buildRegexFromAST builds a regex pattern from the parsed AST | ||
func buildRegexFromAST(node parser.Node) Option[Result] { | ||
var sb strings.Builder | ||
captures := make(map[string]int) | ||
groupCount := 1 | ||
|
||
var processNode func(parser.Node) | ||
processNode = func(n parser.Node) { | ||
switch v := n.(type) { | ||
case *parser.TextNode: | ||
// treat text nodes as literals and convert whitespace to \s+ | ||
escaped := regexp.QuoteMeta(v.Content) | ||
processed := regexp.MustCompile(`\s+`).ReplaceAllString(escaped, `\s+`) | ||
sb.WriteString(processed) | ||
|
||
case *parser.HoleNode: | ||
// convert hole name to capture group name | ||
captures[v.Name] = groupCount | ||
groupCount++ | ||
sb.WriteString(`([^{}]+?)`) | ||
|
||
case *parser.BlockNode: | ||
// block nodes contain curly braces and handle internal nodes | ||
sb.WriteString(`\s*{\s*`) | ||
for _, child := range v.Content { | ||
processNode(child) | ||
} | ||
sb.WriteString(`\s*}\s*`) | ||
|
||
case *parser.PatternNode: | ||
// pattern nodes traverse all child nodes | ||
for _, child := range v.Children { | ||
processNode(child) | ||
} | ||
} | ||
} | ||
|
||
processNode(node) | ||
|
||
regex, err := regexp.Compile(sb.String()) | ||
return createOption(Result{regex: regex, captures: captures}, err) | ||
} | ||
|
||
// patternToRegex converts the pattern string to a compiled *regexp.Regexp | ||
// and returns a Result containing the regex and a map that correlates each | ||
// placeholder name with its capture group index. | ||
func patternToRegex(pattern string) Option[Result] { | ||
lexer := parser.NewLexer(pattern) | ||
tokens := lexer.Tokenize() | ||
|
||
parser := parser.NewParser(tokens) | ||
ast := parser.Parse() | ||
|
||
return buildRegexFromAST(ast) | ||
} | ||
|
||
// rewrite replaces placeholders in the rewrite pattern with the captured values in 'env'. | ||
// | ||
// For each placeholder name, we look for :[[name]] or :[name] in rewritePattern | ||
// and substitute with the corresponding 'env[name]' value. | ||
func rewrite(rewritePattern string, env map[string]string) string { | ||
lexer := parser.NewLexer(rewritePattern) | ||
tokens := lexer.Tokenize() | ||
|
||
prsr := parser.NewParser(tokens) | ||
ast := prsr.Parse() | ||
|
||
var result strings.Builder | ||
|
||
var processNode func(parser.Node) | ||
processNode = func(n parser.Node) { | ||
switch v := n.(type) { | ||
case *parser.TextNode: | ||
result.WriteString(v.Content) | ||
|
||
case *parser.HoleNode: | ||
// replace hole name with the corresponding value in 'env' | ||
if value, ok := env[v.Name]; ok { | ||
result.WriteString(value) | ||
} else { | ||
// if value is not found, keep the original hole expression | ||
result.WriteString(fmt.Sprintf(":[%s]", v.Name)) | ||
} | ||
|
||
case *parser.BlockNode: | ||
result.WriteString("{") | ||
for _, child := range v.Content { | ||
processNode(child) | ||
} | ||
result.WriteString("}") | ||
|
||
case *parser.PatternNode: | ||
for _, child := range v.Children { | ||
processNode(child) | ||
} | ||
} | ||
} | ||
|
||
processNode(ast) | ||
return result.String() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
package fixerv2 | ||
|
||
import ( | ||
"strings" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
type TestResult struct { | ||
vars map[string]string | ||
rewrite string | ||
} | ||
|
||
type TestCase struct { | ||
name string | ||
pattern Pattern | ||
input string | ||
wantMatch bool | ||
wantResult TestResult | ||
} | ||
|
||
func TestPatternMatching(t *testing.T) { | ||
tests := []TestCase{ | ||
{ | ||
name: "basic if-else to return", | ||
pattern: Pattern{ | ||
Match: `if :[[cond]] { | ||
return true | ||
} else { | ||
return false | ||
}`, | ||
Rewrite: "return :[[cond]]", | ||
}, | ||
input: ` | ||
func example() bool { | ||
if x > 0 { | ||
return true | ||
} else { | ||
return false | ||
} | ||
}`, | ||
wantMatch: true, | ||
wantResult: TestResult{ | ||
vars: map[string]string{ | ||
"cond": "x > 0", | ||
}, | ||
rewrite: "return x > 0", | ||
}, | ||
}, | ||
{ | ||
name: "no match for different pattern", | ||
pattern: Pattern{ | ||
Match: `if :[[cond]] { | ||
return true | ||
} else { | ||
return false | ||
}`, | ||
Rewrite: "return :[[cond]]", | ||
}, | ||
input: ` | ||
func example() bool { | ||
if x > 0 { | ||
return true | ||
} | ||
return false | ||
}`, | ||
wantMatch: false, | ||
wantResult: TestResult{ | ||
vars: nil, | ||
rewrite: "", | ||
}, | ||
}, | ||
{ | ||
name: "match with nested conditions", | ||
pattern: Pattern{ | ||
Match: "if :[[outer]] { if :[[inner]] { :[[body]] } }", | ||
Rewrite: "if :[[outer]] && :[[inner]] { :[[body]] }", | ||
}, | ||
input: ` | ||
func example() { | ||
if x > 0 { if y < 10 { doSomething() } } | ||
}`, | ||
wantMatch: true, | ||
wantResult: TestResult{ | ||
vars: map[string]string{ | ||
"outer": "x > 0", | ||
"inner": "y < 10", | ||
"body": "doSomething()", | ||
}, | ||
rewrite: "if x > 0 && y < 10 { doSomething() }", | ||
}, | ||
}, | ||
{ | ||
name: "match with short syntax", | ||
pattern: Pattern{ | ||
Match: "func :[name]() :[ret] { :[body] }", | ||
Rewrite: "func :[name]() :[ret] {\n // Added comment\n :[body]\n}", | ||
}, | ||
input: ` | ||
func example() bool { return true }`, | ||
wantMatch: true, | ||
wantResult: TestResult{ | ||
vars: map[string]string{ | ||
"name": "example", | ||
"ret": "bool", | ||
"body": "return true", | ||
}, | ||
rewrite: "func example() bool {\n // Added comment\n return true\n}", | ||
}, | ||
}, | ||
} | ||
|
||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
resultOpt := patternToRegex(tt.pattern.Match) | ||
assert.NoError(t, resultOpt.err, "patternToRegex should not return error") | ||
|
||
if resultOpt.err != nil { | ||
return | ||
} | ||
|
||
result := resultOpt.value | ||
normalizedInput := normalizePattern(tt.input) | ||
matches := result.regex.FindAllStringSubmatch(normalizedInput, -1) | ||
|
||
if tt.wantMatch { | ||
assert.NotEmpty(t, matches, "expected to find matches") | ||
if len(matches) > 0 { | ||
env := extractEnvironment(t, matches[0], result.captures) | ||
assert.Equal(t, tt.wantResult.vars, env, "captured variables should match") | ||
|
||
rewritten := rewrite(tt.pattern.Rewrite, env) | ||
assert.Equal(t, tt.wantResult.rewrite, rewritten, "rewritten code should match") | ||
} | ||
} else { | ||
assert.Empty(t, matches, "expected no matches") | ||
} | ||
}) | ||
} | ||
} | ||
|
||
// extractEnvironment is a helper function to extract captured variables | ||
func extractEnvironment(t *testing.T, match []string, captures map[string]int) map[string]string { | ||
t.Helper() | ||
env := make(map[string]string) | ||
for name, idx := range captures { | ||
if idx < len(match) { | ||
env[name] = strings.TrimSpace(match[idx]) | ||
} | ||
} | ||
return env | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
package fixerv2 | ||
|
||
import "regexp" | ||
|
||
// Option represents a container type for handling | ||
// values with potential errors | ||
type Option[T any] struct { | ||
value T | ||
err error | ||
} | ||
|
||
// Result holds the compile regex and its captured group mappings | ||
type Result struct { | ||
regex *regexp.Regexp | ||
captures map[string]int | ||
} | ||
|
||
// createOption creates a new Option | ||
func createOption[T any](value T, err error) Option[T] { | ||
return Option[T]{value: value, err: err} | ||
} | ||
|
||
// Map applies a function to the Option value | ||
func (o Option[T]) Map(f func(T) T) Option[T] { | ||
if o.err != nil { | ||
return o | ||
} | ||
return createOption(f(o.value), nil) | ||
} | ||
|
||
// Bind chains Option operations while handling potential errors | ||
func (o Option[T]) Bind(f func(T) Option[T]) Option[T] { | ||
if o.err != nil { | ||
return o | ||
} | ||
return f(o.value) | ||
} |
Oops, something went wrong.