Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AVM: Teal macros #4737

Merged
merged 16 commits into from
Feb 8, 2023
164 changes: 150 additions & 14 deletions data/transactions/logic/assembler.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"sort"
"strconv"
"strings"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I suppose so. It's not really a spec change (of the protocol) but since that's where we document some assembler level stuff, it would be the right place to explain these as well. I'll add.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can merge without a spec mod, as they don't actually change the protocol.

"unicode"

"github.com/algorand/avm-abi/abi"
"github.com/algorand/go-algorand/data/basics"
Expand Down Expand Up @@ -256,6 +257,8 @@ type OpStream struct {

// Need new copy for each opstream
versionedPseudoOps map[string]map[int]OpSpec

macros map[string][]string
}

// newOpStream constructs OpStream instances ready to invoke assemble. A new
Expand All @@ -266,6 +269,7 @@ func newOpStream(version uint64) OpStream {
OffsetToLine: make(map[int]int),
typeTracking: true,
Version: version,
macros: make(map[string][]string),
bbroder-algo marked this conversation as resolved.
Show resolved Hide resolved
known: ProgramKnowledge{fp: -1},
}

Expand Down Expand Up @@ -1845,16 +1849,28 @@ func (ops *OpStream) trackStack(args StackTypes, returns StackTypes, instruction
}
}

// splitTokens breaks tokens into two slices at the first semicolon.
func splitTokens(tokens []string) (current, rest []string) {
for i, token := range tokens {
// nextStatement breaks tokens into two slices at the first semicolon and expands macros along the way.
func nextStatement(ops *OpStream, tokens []string) (current, rest []string) {
for i := 0; i < len(tokens); i++ {
token := tokens[i]
replacement, ok := ops.macros[token]
if ok {
tokens = append(tokens[0:i], append(replacement, tokens[i+1:]...)...)
bbroder-algo marked this conversation as resolved.
Show resolved Hide resolved
// backup to handle potential re-expansion of the first token in the expansion
i--
continue
}
if token == ";" {
return tokens[:i], tokens[i+1:]
}
}
return tokens, nil
}

type directiveFunc func(*OpStream, []string) error

var directives = map[string]directiveFunc{"pragma": pragma, "define": define}

// assemble reads text from an input and accumulates the program
func (ops *OpStream) assemble(text string) error {
fin := strings.NewReader(text)
Expand All @@ -1869,30 +1885,35 @@ func (ops *OpStream) assemble(text string) error {
if len(tokens) > 0 {
if first := tokens[0]; first[0] == '#' {
directive := first[1:]
switch directive {
case "pragma":
ops.pragma(tokens) //nolint:errcheck // report bad pragma line error, but continue assembling
ops.trace("%3d: #pragma line\n", ops.sourceLine)
default:
if dFunc, ok := directives[directive]; ok {
_ = dFunc(ops, tokens)
ops.trace("%3d: %s line\n", ops.sourceLine, first)
} else {
ops.errorf("Unknown directive: %s", directive)
}
continue
}
}
for current, next := splitTokens(tokens); len(current) > 0 || len(next) > 0; current, next = splitTokens(next) {
for current, next := nextStatement(ops, tokens); len(current) > 0 || len(next) > 0; current, next = nextStatement(ops, next) {
if len(current) == 0 {
continue
}
// we're about to begin processing opcodes, so settle the Version
if ops.Version == assemblerNoVersion {
ops.Version = AssemblerDefaultVersion
_ = ops.recheckMacroNames()
}
if ops.versionedPseudoOps == nil {
ops.versionedPseudoOps = prepareVersionedPseudoTable(ops.Version)
}
opstring := current[0]
if opstring[len(opstring)-1] == ':' {
ops.createLabel(opstring[:len(opstring)-1])
labelName := opstring[:len(opstring)-1]
if _, ok := ops.macros[labelName]; ok {
ops.errorf("Cannot create label with same name as macro: %s", labelName)
} else {
ops.createLabel(opstring[:len(opstring)-1])
}
current = current[1:]
if len(current) == 0 {
ops.trace("%3d: label only\n", ops.sourceLine)
Expand Down Expand Up @@ -1970,7 +1991,121 @@ func (ops *OpStream) assemble(text string) error {
return nil
}

func (ops *OpStream) pragma(tokens []string) error {
func (ops *OpStream) cycle(macro string, previous ...string) bool {
replacement, ok := ops.macros[macro]
if !ok {
return false
}
if len(previous) > 0 && macro == previous[0] {
ops.errorf("Macro cycle discovered: %s", strings.Join(append(previous, macro), " -> "))
return true
}
for _, token := range replacement {
if ops.cycle(token, append(previous, macro)...) {
bbroder-algo marked this conversation as resolved.
Show resolved Hide resolved
return true
}
}
return false
}

// recheckMacroNames goes through previously defined macros and ensures they
// don't use opcodes/fields from newly obtained version. Therefore it repeats
// some checks that don't need to be repeated, in the interest of simplicity.
func (ops *OpStream) recheckMacroNames() error {
errored := false
for macroName := range ops.macros {
err := checkMacroName(macroName, ops.Version, ops.labels)
if err != nil {
delete(ops.macros, macroName)
ops.error(err)
errored = true
}
}
if errored {
return errors.New("version is incompatible with defined macros")
}
return nil
}

var otherAllowedChars = [256]bool{'+': true, '-': true, '*': true, '/': true, '^': true, '%': true, '&': true, '|': true, '~': true, '!': true, '>': true, '<': true, '=': true, '?': true, '_': true}

func checkMacroName(macroName string, version uint64, labels map[string]int) error {
var firstRune rune
var secondRune rune
count := 0
for _, r := range macroName {
if count == 0 {
firstRune = r
} else if count == 1 {
secondRune = r
}
if !unicode.IsLetter(r) && !unicode.IsDigit(r) && !otherAllowedChars[r] {
return fmt.Errorf("%s character not allowed in macro name", string(r))
}
count++
}
if unicode.IsDigit(firstRune) {
return fmt.Errorf("Cannot begin macro name with number: %s", macroName)
}
if len(macroName) > 1 && (firstRune == '-' || firstRune == '+') {
if unicode.IsDigit(secondRune) {
return fmt.Errorf("Cannot begin macro name with number: %s", macroName)
}
}
// Note parentheses are not allowed characters, so we don't have to check for b64(AAA) syntax
if macroName == "b64" || macroName == "base64" {
return fmt.Errorf("Cannot use %s as macro name", macroName)
}
if macroName == "b32" || macroName == "base32" {
return fmt.Errorf("Cannot use %s as macro name", macroName)
}
_, isTxnType := txnTypeMap[macroName]
_, isOnCompletion := onCompletionMap[macroName]
bbroder-algo marked this conversation as resolved.
Show resolved Hide resolved
if isTxnType || isOnCompletion {
return fmt.Errorf("Named constants cannot be used as macro names: %s", macroName)
}
if _, ok := pseudoOps[macroName]; ok {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that if we introduce a new pseudo op that happens to have the same name as an existing program's macro, their code will fail to assemble, regardless of whether they've increased their pragma version or not (since pseudo ops aren't versioned). This also applies to txn and on completion types, but I expect new values for these to be much rarer.

I don't have a suggestion to improve the situation, I just wanted to point out this relationship.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. I think we would probably introduce versioning to those names if/when there were newly introduced values.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Though I suppose we have not versioned pseudo-ops in the past, since they are compiled down to normal opcodes, and thus don't require versioning. We'll just have to keep this in mind and decide what to do then.

return fmt.Errorf("Macro names cannot be pseudo-ops: %s", macroName)
}
if version != assemblerNoVersion {
if _, ok := OpsByName[version][macroName]; ok {
return fmt.Errorf("Macro names cannot be opcodes: %s", macroName)
}
if fieldNames[version][macroName] {
return fmt.Errorf("Macro names cannot be field names: %s", macroName)
}
}
if _, ok := labels[macroName]; ok {
return fmt.Errorf("Labels cannot be used as macro names: %s", macroName)
}
return nil
}

func define(ops *OpStream, tokens []string) error {
if tokens[0] != "#define" {
return ops.errorf("invalid syntax: %s", tokens[0])
}
if len(tokens) < 3 {
return ops.errorf("define directive requires a name and body")
}
name := tokens[1]
err := checkMacroName(name, ops.Version, ops.labels)
if err != nil {
return ops.error(err)
}
saved, ok := ops.macros[name]
ops.macros[name] = tokens[2:len(tokens):len(tokens)]
if ops.cycle(tokens[1]) {
if ok {
ops.macros[tokens[1]] = saved
} else {
delete(ops.macros, tokens[1])
}
}
return nil
}

func pragma(ops *OpStream, tokens []string) error {
if tokens[0] != "#pragma" {
return ops.errorf("invalid syntax: %s", tokens[0])
}
Expand Down Expand Up @@ -2001,11 +2136,12 @@ func (ops *OpStream) pragma(tokens []string) error {
// version for v1.
if ops.Version == assemblerNoVersion {
ops.Version = ver
} else if ops.Version != ver {
return ops.recheckMacroNames()
}
if ops.Version != ver {
return ops.errorf("version mismatch: assembling v%d with v%d assembler", ver, ops.Version)
} else {
// ops.Version is already correct, or needed to be upped.
}
// ops.Version is already correct, or needed to be upped.
return nil
case "typetrack":
if len(tokens) < 3 {
Expand Down
Loading