Skip to content

Commit

Permalink
Move semantic encoding into parser and parse on init
Browse files Browse the repository at this point in the history
Generating the encoding once should speed up semantic token
requests since they can be cached with the parser, and it
will also make it easier to regression test.

Parsing on init prevents having an "unparsed" parser, which
would complicate accessing things like evaluated tokens and
semantic encoding.
  • Loading branch information
aazuspan committed Aug 20, 2024
1 parent 09104fb commit 286d0ea
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 38 deletions.
59 changes: 39 additions & 20 deletions src/spinasm_lsp/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,14 @@ def __init__(self, source: str):
# Store built-in constants that were defined at initialization.
self._constants: list[str] = list(self.symtbl.keys())

self.evaluated_tokens: TokenLookup[LSPToken] = TokenLookup()
super().parse()

self.evaluated_tokens: TokenLookup[LSPToken] = self._evaluate_tokens()
"""Tokens with additional metadata after evaluation."""

self.semantic_encoding: list[int] = self._encode_semantics()
"""Integer-encoded token semantics for semantic highlighting."""

def __mkopcodes__(self):
"""
No-op.
Expand Down Expand Up @@ -183,25 +188,39 @@ def __next__(self):
):
self._definitions[base_token.stxt] = base_token.range

def _evaluate_token(self, token: ParsedToken) -> LSPToken:
"""Evaluate a parsed token to determine its value and metadata."""
value = self.jmptbl.get(token.stxt, self.symtbl.get(token.stxt, None))
defined_range = self._definitions.get(token.without_address_modifier().stxt)

return LSPToken.from_parsed_token(
token=token,
value=value,
defined=defined_range,
is_constant=token.stxt in self._constants,
is_label=token.stxt in self.jmptbl,
)

def parse(self) -> SPINAsmParser:
"""Parse and evaluate all tokens."""
super().parse()
def _evaluate_tokens(self) -> TokenLookup[LSPToken]:
"""Evaluate all parsed tokens to determine their values and metadata."""
evaluated_tokens: TokenLookup[LSPToken] = TokenLookup()

for token in self._parsed_tokens:
evaluated_token = self._evaluate_token(token)
self.evaluated_tokens.add_token(evaluated_token)
value = self.jmptbl.get(token.stxt, self.symtbl.get(token.stxt, None))
defined_range = self._definitions.get(token.without_address_modifier().stxt)
evaluated_token = LSPToken.from_parsed_token(
token=token,
value=value,
defined=defined_range,
is_constant=token.stxt in self._constants,
is_label=token.stxt in self.jmptbl,
)

evaluated_tokens.add_token(evaluated_token)

return evaluated_tokens

def _encode_semantics(self) -> list[int]:
"""Encode the semantics of the parsed tokens for semantic highlighting."""
encoding: list[int] = []
prev_token_position = lsp.Position(0, 0)
for token in self.evaluated_tokens:
token_encoding = token.semantic_encoding(prev_token_position)

# Tokens without semantic encoding (e.g. operators) should be ignored so
# that the next encoding is relative to the last encoded token. Otherwise,
# character offsets would be incorrect.
if not token_encoding:
continue

encoding += token_encoding
prev_token_position = token.range.start

return self
return encoding
19 changes: 2 additions & 17 deletions src/spinasm_lsp/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def _parse_document(source: str) -> SPINAsmParser:
Parser are cached based on the source code to speed up subsequent parsing.
"""
return SPINAsmParser(source).parse()
return SPINAsmParser(source)


class SPINAsmLanguageServer(LanguageServer):
Expand Down Expand Up @@ -342,22 +342,7 @@ async def semantic_tokens(
ls: SPINAsmLanguageServer, params: lsp.SemanticTokensParams
) -> lsp.SemanticTokens:
parser = await ls.get_parser(params.text_document.uri)

encoding: list[int] = []
prev_token_position = lsp.Position(0, 0)
for token in parser.evaluated_tokens:
token_encoding = token.semantic_encoding(prev_token_position)

# Tokens without semantic encoding (e.g. operators) should be ignored so that
# the next encoding is relative to the last encoded token. Otherwise, character
# offsets would be incorrect.
if not token_encoding:
continue

encoding += token_encoding
prev_token_position = token.range.start

return lsp.SemanticTokens(data=encoding)
return lsp.SemanticTokens(data=parser.semantic_encoding)


def start() -> None:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def test_get_token_positions():
with open(patch) as fp:
source = fp.read()

parser = SPINAsmParser(source).parse()
parser = SPINAsmParser(source)

all_matches = parser.evaluated_tokens.get(name="apout")
assert len(all_matches) == 4
Expand Down

0 comments on commit 286d0ea

Please sign in to comment.