diff --git a/README.md b/README.md index e69de29..b4c4831 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,15 @@ +# SPINAsm LSP Server + +A Language Server Protocol (LSP) server to provide language support for the [SPINAsm assembly language](http://www.spinsemi.com/Products/datasheets/spn1001-dev/SPINAsmUserManual.pdf). The LSP is built on an extended version of the [asfv1](https://github.com/ndf-zz/asfv1) parser. + +## Features + +- **Diagnostics**: Reports the location of syntax errors and warnings. +- **Hover**: Shows opcode documentation and assigned values on hover. +- **Completion**: Provides suggestions for opcodes, labels, and variables. +- **Renaming**: Allows renaming of labels and variables. +- **Go to definition**: Jumps to the definition of a label, memory address, or variable. + +------ + +*This project is unaffiliated with Spin Semiconductor. Included documentation and examples are Copyright © 2018 Spin Semiconductor.* diff --git a/pyproject.toml b/pyproject.toml index f7b8467..5e6cce9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,11 +5,15 @@ build-backend = "hatchling.build" [project] name = "spinasm-lsp" dynamic = [ "version",] -description = "A Language Server Protocol implementation for SpinASM" +description = "A Language Server Protocol implementation for SPINAsm" readme = "README.md" requires-python = ">=3.9" keywords = [] -dependencies = [ "pygls", "lsprotocol", "lark" ] +dependencies = [ + "pygls", + "lsprotocol", + "asfv1==1.2.7", +] [[project.authors]] name = "Aaron Zuspan" @@ -19,6 +23,9 @@ file = "LICENSE" [project.urls] Homepage = "https://github.com/aazuspan/spinasm-lsp" +[project.scripts] +spinasm-lsp = "spinasm_lsp.server:start" + [tool.ruff] fix = true show-fixes = true @@ -33,7 +40,7 @@ select = [ "E", "I", "F", "B", "FA", "UP", "PT", "Q", "RET", "SIM", "PERF",] dependencies = [ "pre-commit",] [tool.hatch.envs.test] -dependencies = [ "pytest", "pytest-cov", "mistletoe" ] +dependencies = [ "pytest", "pytest-cov", "mistletoe", "pytest-lsp" ] [tool.hatch.envs.test_matrix] template = "test" diff --git a/src/spinasm_lsp/docs/assemblers/equ.md b/src/spinasm_lsp/docs/assemblers/equ.md new file mode 100644 index 0000000..8be6eba --- /dev/null +++ b/src/spinasm_lsp/docs/assemblers/equ.md @@ -0,0 +1,42 @@ +## `EQU` + +------------------ + +The `EQU` statement allows one to define symbolic operands in order to increase the readability of the source code. Technically an `EQU` statement such as: + +```assembly +Name EQU Value [;Comment] +``` + +will cause SPINAsm to replace any occurrence of the literal "Name" by the literal "Value" within each instruction line during the assembly process excluding the comment portion of an instruction line. + +With the exception of blanks, any printable character is allowed within the literal "Name". However there are restrictions: "Name" must be an unique string, is limited to 32 characters and the first character must be a letter excluding the "+" and "­" signs and the "!" character. + +The reason for not allowing these characters being the first character of "Name" is that any symbolic operand may be prefixed with a sign or the "!" negation operator within the instruction line. The assembler will then perform the required conversion of the operand while processing the individual instruction lines. + +There is another, not syntax related, restriction when using symbolic operands defined by an `EQU` statement: Predefined symbols. As given in the end of the manual there is a set of predefined symbolic operands which should be omitted as "Name" literals within an `EQU` statement. It is not that these predefined symbols are prohibited, it is just that using them within an `EQU` statement will overwrite their predefined value. + +With the literal "Value" things are slightly more complicated since its format has to comply with the syntactical rules defined for the operand type it is to represent. Although it is suggested to place `EQU` statements at the beginning of the source code file, this is not mandatory. However, the `EQU` statement has to be defined before the literal "Name" can be used as a symbolical operand within an instruction line. + +### Remark +SPINAsm has no way of performing range checking while processing the EQU statement. This is because the operand type of value is not known to SPINAsm at the time the EQU statement is processed. As a result, range checking is performed when assembling the instruction line in which "Name" is to be replaced by "Value". + +### Example +```assembly +Attn EQU 0.5 ; 0.5 = -6dB attenuation +Tmp_Reg EQU 63 ; Temporary register within register file +Tmp_Del EQU $2000 ; Temporary memory location within delay ram +; +;------------------------------ +sof 0,0 ; Clear ACC +rda Tmp_Del,Attn ; Load sample from delay ram $2000, + ; multiply it by 0.5 and add ACC content +wrax Tmp_Reg,1.0 ; Save result to Tmp_Reg but keep it in ACC +wrax DACL,0 ; Move ACC to DAC left (predefined symbol) + ; and then clear ACC +``` + +If `Tmp_Del` was accidentally replaced by `Tmp_Reg` within the `rda` instruction line, SPINAsm would not detect this semantic error – simply because using `Tmp_Reg` would be syntactically correct. + +------------------ +*Adapted from Spin Semiconductor SPINAsm & FV-1 Instruction Set reference manual. Copyright 2008 by Spin Semiconductor.* \ No newline at end of file diff --git a/src/spinasm_lsp/docs/assemblers/mem.md b/src/spinasm_lsp/docs/assemblers/mem.md new file mode 100644 index 0000000..8ca98af --- /dev/null +++ b/src/spinasm_lsp/docs/assemblers/mem.md @@ -0,0 +1,64 @@ +## `MEM` + +------------------ + +The `MEM` Statement allows the user to partition the delay ram memory into individual blocks. A memory block declared by the statement + +```assembly +Name `MEM` Size [;Comment] +``` + +can be referenced by `Name` from within an instruction line. `Name` has to comply with the same syntactical rules previously defined with the EQU statement, "Size" is an unsigned integer in the range of 1 to 32768 which might be entered either in decimal or in hexadecimal. + +Besides the explicit identifier `Name` the assembler defines two additional implicit identifiers, `Name#` and `Name^`. `Name` refers to the first memory location within the memory block, whereas `Name#` refers to the last memory location. The identifier `Name^` references the middle of the memory block, or in other words its center. If a memory block of size 1 is defined, all three identifiers will address the same memory location. In case the memory block is of size 2, `Name` and `Name^` will address the same memory location, if the size is an even number the memory block cannot exactly be halved – the midpoint `Name^` will be calculated as: `size MOD 2`. + +Optionally all three identifiers can be offset by a positive or negative integer which is entered in decimal. Although range checking is performed when using offsets, there is no error generated if the result of the address calculation exceeds the address range of the memory block. This is also true for those cases in which the result will "wrap around" the physical 32k boundary of the delay memory. However, a warning will be issued in order to alert the user regarding the out of range condition. + +Mapping the memory blocks to their physical delay ram addresses is solely handled by SPINAsm. The user has no possibility to explicitly force SPINAsm to place a certain memory block to a specific physical address range. This of course does not mean that the user has no control over the layout of the delay ram at all: Knowing that SPINAsm will map memory blocks in the order they become defined within the source file, the user can implicitly control the memory map of the delay ram. + +### Example +```assembly +DelR MEM 1024 ; Right channel delay line +DelL MEM 1024 ; Left channel delay line + ; +;------------------------------ +sof 0,0 ; Clear ACC +rdax ADCL,1.0 ; Read in left ADC +wra DelL,0.25 ; Save it to the start of the left delay + ; line and keep a -12dB replica in ACC +rdax DelL^+20,0.25; Add sample from "center of the left delay + ; line + 20 samples" times 0.25 to ACC +rdax DelL#,0.25 ; Add sample from "end of the left delay + ; line" times 0.25 to ACC +rdax DelL-512,0.25; Add sample from "start of the left delay + ; line - 512 samples" times 0.25 to ACC +``` + +### Remark +At this point the result of the address calculation will reference a sample from outside the `DelL` memory block. While being syntactically correct, the instruction might not result in what the user intended. In order to make the user aware of that potential semantic error, a warning will be issued. + +```assembly +wrax DACL,0 ; Result to DACL, clear ACC + ; +rdax ADCR,1.0 ; Read in right ADC +wra DelR,0.25 ; Save it to the start of the right delay + ; line and keep a -12dB replica in ACC +rdax DelR^-20,0.25; Add sample from center of the right delay + ; line - 20 samples times 0.25 to ACC +rdax DelR#,0.25 ; Add sample from end of the right delay line + ; times 0.25 to ACC +rdax DelR-512,0.25; Add sample from start of the right delay + ; line - 512 samples times 0.25 to ACC +``` + +### Remark +At this point the result of the address calculation will reference a sample from outside the `DelR` memory block. And even worse than the previous case: This time the sample be fetched from delay ram address 32256 which will contain a sample that is apx. 1 second old! + +Again, syntactically correct but most likely a semantic error – warnings will be issued. + +```assembly +wrax DACR,0 ; Result to DACR, clear ACC +``` + +------------------ +*Adapted from Spin Semiconductor SPINAsm & FV-1 Instruction Set reference manual. Copyright 2008 by Spin Semiconductor.* \ No newline at end of file diff --git a/src/spinasm_lsp/documentation.py b/src/spinasm_lsp/documentation.py index eda40da..197768e 100644 --- a/src/spinasm_lsp/documentation.py +++ b/src/spinasm_lsp/documentation.py @@ -1,3 +1,5 @@ +"""Documentation utilities for the SPINAsm LSP.""" + from __future__ import annotations from collections import UserDict @@ -21,8 +23,8 @@ def read(self) -> str: class DocMap(UserDict): """A mapping of instructions to markdown documentation strings.""" - def __init__(self, folder: str): - self.dir = Path(str(DOC_DIR.joinpath(folder))) + def __init__(self, folders: list[str]): + self.folders = [Path(str(DOC_DIR.joinpath(folder))) for folder in folders] self.data = self.load_markdown() @staticmethod @@ -40,15 +42,13 @@ def __contains__(self, key): def load_markdown(self) -> dict[str, str]: data = {} - files = self.dir.glob("*.md") - for file in files: - md = MarkdownFile(file) - # Store with lowercase keys to allow case-insensitive searches - data[md.name.lower()] = md.read() + for folder in self.folders: + if not folder.exists(): + raise FileNotFoundError(f"Folder {folder} does not exist.") + files = folder.glob("*.md") + for file in files: + md = MarkdownFile(file) + # Store with lowercase keys to allow case-insensitive searches + data[md.name.lower()] = md.read() return data - - -if __name__ == "__main__": - instructions = DocMap(folder="instructions") - print(instructions["RDAX"]) diff --git a/src/spinasm_lsp/logging.py b/src/spinasm_lsp/logging.py deleted file mode 100644 index 72f1491..0000000 --- a/src/spinasm_lsp/logging.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Any - -from lsprotocol import types as lsp -from pygls import server - - -class ServerLogger: - def __init__(self, server: server.LanguageServer): - self.server = server - - def debug(self, msg: Any) -> None: - self.server.show_message_log(str(msg), lsp.MessageType.Debug) - - def info(self, msg: Any) -> None: - self.server.show_message_log(str(msg), lsp.MessageType.Info) - - def warning(self, msg: Any) -> None: - self.server.show_message_log(str(msg), lsp.MessageType.Warning) - - def error(self, msg: Any) -> None: - self.server.show_message_log(str(msg), lsp.MessageType.Error) diff --git a/src/spinasm_lsp/parser.py b/src/spinasm_lsp/parser.py index 2d78352..79fd5b7 100644 --- a/src/spinasm_lsp/parser.py +++ b/src/spinasm_lsp/parser.py @@ -1,213 +1,345 @@ +"""The SPINAsm language parser.""" + from __future__ import annotations -from dataclasses import dataclass -from typing import Literal +import bisect +import copy +from typing import Literal, TypedDict + +import lsprotocol.types as lsp +from asfv1 import fv1parse + + +class Symbol(TypedDict): + """ + The token specification used by asfv1. + + Note that we exclude EOF tokens, as they are ignored by the LSP. + """ + + type: Literal[ + "ASSEMBLER", + "INTEGER", + "LABEL", + "TARGET", + "MNEMONIC", + "OPERATOR", + "FLOAT", + "ARGSEP", + ] + txt: str + stxt: str + val: int | float | None + + +class Token: + """ + A parsed token. + + Parameters + ---------- + symbol : Symbol + The symbol parsed by asfv1 representing the token. + start : lsp.Position + The start position of the token in the source file. + end : lsp.Position, optional + The end position of the token in the source file. If not provided, the end + position is calculated based on the width of the symbol's stxt. + + Attributes + ---------- + symbol : Symbol + The symbol parsed by asfv1 representing the token. + range : lsp.Range + The location range of the token in the source file. + next_token : Token | None + The token that follows this token in the source file. + prev_token : Token | None + The token that precedes this token in the source file. + """ -from lark import Lark, Transformer, v_args -from lark.exceptions import VisitError + def __init__( + self, symbol: Symbol, start: lsp.Position, end: lsp.Position | None = None + ): + if end is None: + width = len(symbol["stxt"]) + end = lsp.Position(line=start.line, character=start.character + width - 1) + + self.symbol: Symbol = symbol + self.range: lsp.Range = lsp.Range(start=start, end=end) + self.next_token: Token | None = None + self.prev_token: Token | None = None + + def __repr__(self) -> str: + return self.symbol["stxt"] + + def concatenate(self, other: Token) -> Token: + """ + Concatenate by merging with another token, in place. + + In practice, this is used for the multi-word opcodes that are parsed as separate + tokens: CHO RDA, CHO RDAL, and CHO SOF. + """ + if any( + symbol_type not in ("MNEMONIC", "LABEL") + for symbol_type in (self.symbol["type"], other.symbol["type"]) + ): + raise TypeError("Only MNEMONIC and LABEL symbols can be concatenated.") + self.symbol["txt"] += f" {other.symbol['txt']}" + self.symbol["stxt"] += f" {other.symbol['stxt']}" + self.range.end = other.range.end + return self -class ParsingError(Exception): ... + def _clone(self) -> Token: + """Return a clone of the token to avoid mutating the original.""" + return copy.deepcopy(self) + + def without_address_modifier(self) -> Token: + """ + Create a clone of the token with the address modifier removed. + """ + if not str(self).endswith("#") and not str(self).endswith("^"): + return self + + token = self._clone() + token.symbol["stxt"] = token.symbol["stxt"][:-1] + token.range.end.character -= 1 + + return token + + +class TokenRegistry: + """A registry of tokens and their positions in a source file.""" + + def __init__(self, tokens: list[Token] | None = None) -> None: + self._prev_token: Token | None = None + + """A dictionary mapping program lines to all Tokens on that line.""" + self._tokens_by_line: dict[int, list[Token]] = {} + + """A dictionary mapping token names to all matching Tokens in the program.""" + self._tokens_by_name: dict[str, list[Token]] = {} + + for token in tokens or []: + self.register_token(token) + + def register_token(self, token: Token) -> None: + """Add a token to the registry.""" + # Handle multi-word CHO instructions by merging the second token with the first + # and skipping the second token. + if str(self._prev_token) == "CHO" and str(token) in ("RDA", "RDAL", "SOF"): + self._prev_token.concatenate(token) # type: ignore + return + + if token.range.start.line not in self._tokens_by_line: + self._tokens_by_line[token.range.start.line] = [] + + # Record the previous and next token for each token to allow traversing + if self._prev_token: + token.prev_token = self._prev_token + self._prev_token.next_token = token + + # Store the token on its line + self._tokens_by_line[token.range.start.line].append(token) + self._prev_token = token + + # Store user-defined tokens together by name. Other token types could be stored, + # but currently there's no use case for retrieving their positions. + if token.symbol["type"] in ("LABEL", "TARGET"): + # Tokens are stored by name without address modifiers, so that e.g. Delay# + # and Delay can be retrieved with the same query. This allows for renaming + # all instances of a memory token. + token = token.without_address_modifier() + + if str(token) not in self._tokens_by_name: + self._tokens_by_name[str(token)] = [] + + self._tokens_by_name[str(token)].append(token) + + def get_matching_tokens(self, token_name: str) -> list[Token]: + """Retrieve all tokens with a given name in the program.""" + return self._tokens_by_name.get(token_name.upper(), []) + + def get_token_at_position(self, position: lsp.Position) -> Token | None: + """Retrieve the token at the given position.""" + if position.line not in self._tokens_by_line: + return None + + line_tokens = self._tokens_by_line[position.line] + token_starts = [t.range.start.character for t in line_tokens] + token_ends = [t.range.end.character for t in line_tokens] + + idx = bisect.bisect_left(token_starts, position.character) + + # The index returned by bisect_left points to the start value >= character. This + # will either be the first character of the token or the start of the next + # token. First check if we're out of bounds, then shift left unless we're at the + # first character of the token. + if idx == len(line_tokens) or token_starts[idx] != position.character: + idx -= 1 + + # If the col falls after the end of the token, we're not inside a token. + if position.character > token_ends[idx]: + return None + return line_tokens[idx] -@dataclass -class Expression: - expression: list[int | float | str] - def __eq__(self, other): - # If the expression has a single value, match against that - if len(self.expression) == 1: - return self.expression[0] == other +class SPINAsmParser(fv1parse): + """A modified version of fv1parse optimized for use with LSP.""" - # Otherwise, match the expression as a concated string of values and operators - return " ".join(map(str, self.expression)) == other + sym: Symbol | None + def __init__(self, source: str): + self.diagnostics: list[lsp.Diagnostic] = [] + self.definitions: dict[str, lsp.Range] = {} + self.current_character: int = 0 + self.previous_character: int = 0 + self.token_registry = TokenRegistry() -@dataclass -class Instruction: - opcode: str - args: list[Expression] + super().__init__( + source=source, + clamp=True, + spinreals=False, + # Ignore the callbacks in favor of overriding their callers + wfunc=lambda *args, **kwargs: None, + efunc=lambda *args, **kwargs: None, + ) + # Keep an unchanged copy of the original source + self._source: list[str] = self.source.copy() + + def __mkopcodes__(self): + """ + No-op. + + Generating opcodes isn't needed for LSP functionality, so we'll skip it. + """ + + def _record_diagnostic( + self, msg: str, line: int, character: int, severity: lsp.DiagnosticSeverity + ): + """Record a diagnostic message for the LSP.""" + self.diagnostics.append( + lsp.Diagnostic( + range=lsp.Range( + start=lsp.Position(line, character=character), + end=lsp.Position(line, character=character), + ), + message=msg, + severity=severity, + source="SPINAsm", + ) + ) -@dataclass -class Assignment: - type: Literal["equ", "mem"] - name: str - value: Expression + def parseerror(self, msg: str, line: int | None = None): + """Override to record parsing errors as LSP diagnostics.""" + if line is None: + line = self.prevline + + # Offset the line from the parser's 1-indexed line to the 0-indexed line + self._record_diagnostic( + msg, + line=line - 1, + character=self.current_character, + severity=lsp.DiagnosticSeverity.Error, + ) + def scanerror(self, msg: str): + """Override to record scanning errors as LSP diagnostics.""" + self._record_diagnostic( + msg, + line=self.current_line, + character=self.current_character, + severity=lsp.DiagnosticSeverity.Error, + ) -@dataclass -class Label: - name: str + def parsewarn(self, msg: str, line: int | None = None): + """Override to record parsing warnings as LSP diagnostics.""" + if line is None: + line = self.prevline + + # Offset the line from the parser's 1-indexed line to the 0-indexed line + self._record_diagnostic( + msg, + line=line - 1, + character=self.current_character, + severity=lsp.DiagnosticSeverity.Warning, + ) + @property + def sline(self): + return self._sline -@v_args(inline=True) -class FV1ProgramTransformer(Transformer): - local_vars: dict[str, float] - memory: dict[str, float] + @sline.setter + def sline(self, value): + """Update the current line and reset the column.""" + self._sline = value - def __init__( - self, - local_vars: dict[str, float], - memory: dict[str, float], - visit_tokens: bool = True, - ) -> None: - self.local_vars = local_vars - self.memory = memory - super().__init__(visit_tokens=visit_tokens) - - def instruction(self, opcode: str, args: list | None, _) -> Instruction: - return Instruction(opcode, args or []) - - def assignment(self, mapping, _): - return mapping - - def label(self, name) -> Label: - return Label(name) - - def equ(self, name: str, value) -> Assignment: - self.local_vars[name] = value - return Assignment(type="equ", name=name, value=value) - - def mem(self, name: str, value) -> Assignment: - self.memory[name] = value - return Assignment(type="mem", name=name, value=value) - - def value(self, negative: str | None, value: float) -> float: - """A negated value.""" - if negative: - value *= -1 - - return value - - def DEC_NUM(self, token) -> int | float: - if "." in token: - return float(token) - return int(token) - - def HEX_NUM(self, token) -> int: - # Hex numbers can be written with either $ or 0x prefix - token = token.replace("$", "0x") - return int(token, base=16) - - def BIT_VECTOR(self, token) -> int: - # Remove the % prefix and optional underscores - return int(token[1:].replace("_", ""), base=2) - - def IDENT(self, token) -> str: - # Identifiers are case-insensitive and are stored in uppercase for consistency - # with the FV-1 assembler. - return token.upper() - - @v_args(inline=False) - def args(self, tokens): - return tokens - - @v_args(inline=False) - def expr(self, tokens): - return Expression(tokens) - - @v_args(inline=False) - def program(self, tokens): - return list(tokens) - - -class FV1Program: - constants = { - "SIN0_RATE": 0x00, - "SIN0_RANGE": 0x01, - "SIN1_RATE": 0x02, - "SIN1_RANGE": 0x03, - "RMP0_RATE": 0x04, - "RMP0_RANGE": 0x05, - "RMP1_RATE": 0x06, - "RMP1_RANGE": 0x07, - "POT0": 0x10, - "POT1": 0x11, - "POT2": 0x12, - "ADCL": 0x14, - "ADCR": 0x15, - "DACL": 0x16, - "DACR": 0x17, - "ADDR_PTR": 0x18, - "REG0": 0x20, - "REG1": 0x21, - "REG2": 0x22, - "REG3": 0x23, - "REG4": 0x24, - "REG5": 0x25, - "REG6": 0x26, - "REG7": 0x27, - "REG8": 0x28, - "REG9": 0x29, - "REG10": 0x2A, - "REG11": 0x2B, - "REG12": 0x2C, - "REG13": 0x2D, - "REG14": 0x2E, - "REG15": 0x2F, - "REG16": 0x30, - "REG17": 0x31, - "REG18": 0x32, - "REG19": 0x33, - "REG20": 0x34, - "REG21": 0x35, - "REG22": 0x36, - "REG23": 0x37, - "REG24": 0x38, - "REG25": 0x39, - "REG26": 0x3A, - "REG27": 0x3B, - "REG28": 0x3C, - "REG29": 0x3D, - "REG30": 0x3E, - "REG31": 0x3F, - "SIN0": 0x00, - "SIN1": 0x01, - "RMP0": 0x02, - "RMP1": 0x03, - "RDA": 0x00, - "SOF": 0x02, - "RDAL": 0x03, - "SIN": 0x00, - "COS": 0x01, - "REG": 0x02, - "COMPC": 0x04, - "COMPA": 0x08, - "RPTR2": 0x10, - "NA": 0x20, - "RUN": 0x10, - "ZRC": 0x08, - "ZRO": 0x04, - "GEZ": 0x02, - "NEG": 0x01, - } - local_vars: dict[str, float] = {**constants} - memory: dict[str, float] = {} - - def __init__(self, code: str): - self.transformer = FV1ProgramTransformer( - local_vars=self.local_vars, memory=self.memory - ) + # Reset the column to 0 when we move to a new line + self.previous_character = self.current_character + self.current_character = 0 - self.parser = Lark.open_from_package( - package="spinasm_lsp", - grammar_path="spinasm.lark", - start="program", - parser="lalr", - strict=False, - transformer=self.transformer, + @property + def current_line(self): + """Get the zero-indexed current line.""" + return self.sline - 1 + + @property + def previous_line(self): + """Get the zero-indexed previous line.""" + return self.prevline - 1 + + def __next__(self): + """Parse the next symbol and update the column and definitions.""" + super().__next__() + if self.sym["type"] == "EOF": + return + + self._update_column() + + token_start = lsp.Position( + line=self.current_line, character=self.current_character ) - # Make sure the code ends with a newline to properly parse the last line - if not code.endswith("\n"): - code += "\n" + token = Token(self.sym, start=token_start) + self.token_registry.register_token(token) - try: - self.statements = self.parser.parse(code) - except VisitError as e: - # Unwrap errors thrown by FV1ProgramTransformer - if wrapped_err := e.__context__: - raise wrapped_err from None + base_token = token.without_address_modifier() + is_user_definable = base_token.symbol["type"] in ("LABEL", "TARGET") + is_defined = str(base_token) in self.jmptbl or str(base_token) in self.symtbl + + if ( + is_user_definable + and not is_defined + # Labels appear before their target definition, so override when the target + # is defined. + or base_token.symbol["type"] == "TARGET" + ): + self.definitions[str(base_token)] = base_token.range - raise e + def _update_column(self): + """Set the current column based on the last parsed symbol.""" + current_line_txt = self._source[self.current_line] + current_symbol = self.sym.get("txt", None) or "" + + self.previous_character = self.current_character + try: + # Start at the current column to skip previous duplicates of the symbol + self.current_character = current_line_txt.index( + current_symbol, self.current_character + ) + except ValueError: + self.current_character = 0 + + def parse(self) -> SPINAsmParser: + """Parse and return the parser.""" + super().parse() + return self + + +if __name__ == "__main__": + code = r"""cho rda,sin0,sin|reg|compc,0""" + parsed = SPINAsmParser(code).parse() + print(parsed.token_registry._tokens_by_line[0]) diff --git a/src/spinasm_lsp/server.py b/src/spinasm_lsp/server.py index daf5015..b145b72 100644 --- a/src/spinasm_lsp/server.py +++ b/src/spinasm_lsp/server.py @@ -1,47 +1,242 @@ +"""The SPINAsm Language Server Protocol implementation.""" + +from __future__ import annotations + +from functools import lru_cache +from typing import Any + from lsprotocol import types as lsp from pygls.server import LanguageServer from . import __version__ from .documentation import DocMap -from .logging import ServerLogger +from .parser import SPINAsmParser -LSP_SERVER = LanguageServer( - name="spinasm-lsp", - version=__version__, - max_workers=5, -) -LOGGER = ServerLogger(LSP_SERVER) +@lru_cache(maxsize=1) +def _parse_document(source: str) -> SPINAsmParser: + """ + Parse a document and return the parser. + + Parser are cached based on the source code to speed up subsequent parsing. + """ + return SPINAsmParser(source).parse() + + +class SPINAsmLanguageServer(LanguageServer): + def __init__(self, *args, **kwargs) -> None: + self._prev_parser: SPINAsmParser | None = None + self.documentation = DocMap(folders=["instructions", "assemblers"]) + + super().__init__(*args, name="spinasm-lsp", version=__version__, **kwargs) + + def debug(self, msg: Any) -> None: + """Log a debug message.""" + # MessageType.Debug is a proposed feature of 3.18.0, and isn't fully supported + # yet. + self.show_message_log(str(msg), lsp.MessageType.Log) + + def info(self, msg: Any) -> None: + """Log an info message.""" + self.show_message_log(str(msg), lsp.MessageType.Info) + + def warning(self, msg: Any) -> None: + """Log a warning message.""" + self.show_message_log(str(msg), lsp.MessageType.Warning) + + def error(self, msg: Any) -> None: + """Log an error message.""" + self.show_message_log(str(msg), lsp.MessageType.Error) + + async def get_parser(self, uri: str) -> SPINAsmParser: + """Return a parser for the document, caching if possible.""" + document = self.workspace.get_text_document(uri) + parser = _parse_document(document.source) + + # Skip publishing diagnostics if the parser is unchanged + if parser is not self._prev_parser: + self.publish_diagnostics(document.uri, parser.diagnostics) + self._prev_parser = parser + + return parser + -# TODO: Probably load async as part of a custom language server subclass -INSTRUCTIONS = DocMap(folder="instructions") +server = SPINAsmLanguageServer(max_workers=5) -@LSP_SERVER.feature(lsp.TEXT_DOCUMENT_HOVER) -def hover(ls: LanguageServer, params: lsp.HoverParams) -> lsp.Hover: +@server.feature(lsp.TEXT_DOCUMENT_DID_CHANGE) +async def did_change( + ls: SPINAsmLanguageServer, params: lsp.DidChangeTextDocumentParams +): + """Run diagnostics on changed document.""" + await ls.get_parser(params.text_document.uri) + + +@server.feature(lsp.TEXT_DOCUMENT_DID_SAVE) +async def did_save(ls: SPINAsmLanguageServer, params: lsp.DidSaveTextDocumentParams): + """Run diagnostics on saved document.""" + await ls.get_parser(params.text_document.uri) + + +@server.feature(lsp.TEXT_DOCUMENT_DID_OPEN) +async def did_open(ls: SPINAsmLanguageServer, params: lsp.DidOpenTextDocumentParams): + """Run diagnostics on open document.""" + await ls.get_parser(params.text_document.uri) + + +@server.feature(lsp.TEXT_DOCUMENT_DID_CLOSE) +def did_close( + ls: SPINAsmLanguageServer, params: lsp.DidCloseTextDocumentParams +) -> None: + """Clear the diagnostics on close.""" + ls.publish_diagnostics(params.text_document.uri, []) + + +def _get_defined_hover(stxt: str, parser: SPINAsmParser) -> str: + """Get a hover message with the value of a defined variable or label.""" + # Check jmptbl first since labels are also defined in symtbl + if stxt in parser.jmptbl: + hover_definition = parser.jmptbl[stxt] + return f"(label) {stxt}: Offset[{hover_definition}]" + if stxt in parser.symtbl: + hover_definition = parser.symtbl[stxt] + return f"(constant) {stxt}: Literal[{hover_definition}]" + + return "" + + +@server.feature(lsp.TEXT_DOCUMENT_HOVER) +async def hover(ls: SPINAsmLanguageServer, params: lsp.HoverParams) -> lsp.Hover | None: """Retrieve documentation from symbols on hover.""" - document = ls.workspace.get_text_document(params.text_document.uri) - pos = params.position + parser = await ls.get_parser(params.text_document.uri) - # TODO: Handle multi-word instructions like CHO RDA, CHO SOF, CHO RDAL - try: - word = document.word_at_position(pos) - except IndexError: + if (token := parser.token_registry.get_token_at_position(params.position)) is None: return None - word_docs = INSTRUCTIONS.get(word, None) - if word_docs: - return lsp.Hover( - contents=lsp.MarkupContent(kind=lsp.MarkupKind.Markdown, value=word_docs), + hover_msg = None + if token.symbol["type"] in ("LABEL", "TARGET"): + hover_msg = _get_defined_hover(str(token), parser=parser) + + elif token.symbol["type"] in ("ASSEMBLER", "MNEMONIC"): + hover_msg = ls.documentation.get(str(token), "") + + return ( + None + if not hover_msg + else lsp.Hover( + contents=lsp.MarkupContent(kind=lsp.MarkupKind.Markdown, value=hover_msg), ) + ) + + +@server.feature(lsp.TEXT_DOCUMENT_COMPLETION) +async def completions( + ls: SPINAsmLanguageServer, params: lsp.CompletionParams +) -> lsp.CompletionList: + """Returns completion items.""" + parser = await ls.get_parser(params.text_document.uri) + + symbol_completions = [ + lsp.CompletionItem( + label=symbol, + kind=lsp.CompletionItemKind.Constant, + detail=_get_defined_hover(symbol, parser=parser), + ) + for symbol in parser.symtbl + ] + + label_completions = [ + lsp.CompletionItem( + label=label, + kind=lsp.CompletionItemKind.Reference, + detail=_get_defined_hover(label, parser=parser), + ) + for label in parser.jmptbl + ] + + opcode_completions = [ + lsp.CompletionItem( + label=opcode, + kind=lsp.CompletionItemKind.Function, + detail="(opcode)", + documentation=lsp.MarkupContent( + kind=lsp.MarkupKind.Markdown, value=ls.documentation[opcode] + ), + ) + for opcode in [k.upper() for k in ls.documentation] + ] + + return lsp.CompletionList( + is_incomplete=False, + items=symbol_completions + label_completions + opcode_completions, + ) + + +@server.feature(lsp.TEXT_DOCUMENT_DEFINITION) +async def definition( + ls: SPINAsmLanguageServer, params: lsp.DefinitionParams +) -> lsp.Location | None: + """Returns the definition location of a symbol.""" + parser = await ls.get_parser(params.text_document.uri) + + document = ls.workspace.get_text_document(params.text_document.uri) + + if (token := parser.token_registry.get_token_at_position(params.position)) is None: + return None + + # Definitions should be checked against the base token name, ignoring address + # modifiers. + base_token = token.without_address_modifier() + + if str(base_token) not in parser.definitions: + return None + + return lsp.Location( + uri=document.uri, + range=parser.definitions[str(base_token)], + ) + + +@server.feature(lsp.TEXT_DOCUMENT_PREPARE_RENAME) +async def prepare_rename(ls: SPINAsmLanguageServer, params: lsp.PrepareRenameParams): + """Called by the client to determine if renaming the symbol at the given location + is a valid operation.""" + parser = await ls.get_parser(params.text_document.uri) + + if (token := parser.token_registry.get_token_at_position(params.position)) is None: + return None + + # Renaming is checked against the base token name, ignoring address modifiers. + base_token = token.without_address_modifier() + + # Only user-defined labels should support renaming + if str(base_token) not in parser.definitions: + ls.info(f"Can't rename non-user defined token {base_token}.") + return None + + return lsp.PrepareRenameResult_Type2(default_behavior=True) + + +@server.feature( + lsp.TEXT_DOCUMENT_RENAME, options=lsp.RenameOptions(prepare_provider=True) +) +async def rename(ls: SPINAsmLanguageServer, params: lsp.RenameParams): + parser = await ls.get_parser(params.text_document.uri) + + if (token := parser.token_registry.get_token_at_position(params.position)) is None: + return None + + # Ignore address modifiers so that e.g. we can rename `Delay` by renaming `Delay#` + base_token = token.without_address_modifier() + matching_tokens = parser.token_registry.get_matching_tokens(str(base_token)) - return None + edits = [lsp.TextEdit(t.range, new_text=params.new_name) for t in matching_tokens] + return lsp.WorkspaceEdit(changes={params.text_document.uri: edits}) def start() -> None: - LSP_SERVER.start_io() + server.start_io() if __name__ == "__main__": - instructions = DocMap(folder="instructions") start() diff --git a/src/spinasm_lsp/spinasm.lark b/src/spinasm_lsp/spinasm.lark deleted file mode 100644 index 95b0953..0000000 --- a/src/spinasm_lsp/spinasm.lark +++ /dev/null @@ -1,60 +0,0 @@ -program: (instruction | assignment | label)* - -instruction: OPCODE [args] NEWLINE -args: expr (","+ expr)* -expr: value (OPERATOR value)* -value: [NEGATIVE] (NAME | HEX_NUM | DEC_NUM | BIT_VECTOR) - -assignment: (equ | mem) NEWLINE -equ: IDENT "EQU"i expr | "EQU"i IDENT expr -mem: IDENT "MEM"i expr | "MEM"i IDENT expr - -label: IDENT ":" - - -OPCODE.1: "ABSA"i - | "AND"i - | "CHO"i - | "CLR"i - | "EXP"i - | "JAM"i - | "LDAX"i - | "LOG"i - | "MAXX"i - | "MULX"i - | "NOT"i - | "OR"i - | "RDA"i - | "RDAX"i - | "RDFX"i - | "RMPA"i - | "SKP"i - | "SOF"i - | "WLDR"i - | "WLDS"i - | "WRA"i - | "WRAP"i - | "WRAX"i - | "WRHX"i - | "WRLX"i - | "XOR"i - -// NAME can be suffixed with ^ or # to modify memory addressing -NAME: IDENT [ADDR_MODIFIER] -ADDR_MODIFIER: ["^" | "#"] -NEGATIVE: "-" -COMMENT: ";" /[^\n]/* -OPERATOR: "+" | "-" | "*" | "/" | "|" | "&" -HEX_NUM.1: "0x"i HEXDIGIT+ | "$" HEXDIGIT+ -BIT_VECTOR: "%" /[01](_?[01])*/ - -%import common.WS_INLINE -%import common.WS -%import common.NEWLINE -%import common.HEXDIGIT -%import common.CNAME -> IDENT -%import common.NUMBER -> DEC_NUM - -%ignore WS -%ignore WS_INLINE -%ignore COMMENT diff --git a/tests/conftest.py b/tests/conftest.py index b32b615..d9743a2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,233 @@ +from __future__ import annotations + from pathlib import Path +from typing import TypedDict + +import lsprotocol.types as lsp PATCH_DIR = Path(__file__).parent / "patches" TEST_PATCHES = list(PATCH_DIR.glob("*.spn")) assert TEST_PATCHES, "No test patches found in the patches directory." + + +class AssignmentDict(TypedDict): + """A dictionary track where a symbol is referenced and defined.""" + + symbol: str + referenced: lsp.Position + defined: lsp.Location + + +class HoverDict(TypedDict): + """A dictionary to record hover information for a symbol.""" + + symbol: str + position: lsp.Position + contains: str | None + + +class PrepareRenameDict(TypedDict): + """A dictionary to record prepare rename results for a symbol.""" + + symbol: str + position: lsp.Position + result: bool + message: str | None + + +class RenameDict(TypedDict): + """A dictionary to record rename results for a symbol.""" + + symbol: str + rename_to: str + position: lsp.Position + changes: list[lsp.TextEdit] + + +# Example assignments from the "Basic.spn" patch, for testing definition locations +ASSIGNMENTS: list[AssignmentDict] = [ + { + # Variable + "symbol": "apout", + "referenced": lsp.Position(line=57, character=7), + "defined": lsp.Location( + uri=f"file:///{PATCH_DIR / 'Basic.spn'}", + range=lsp.Range( + start=lsp.Position(line=23, character=4), + end=lsp.Position(line=23, character=8), + ), + ), + }, + { + # Memory + "symbol": "lap2a", + "referenced": lsp.Position(line=72, character=7), + "defined": lsp.Location( + uri=f"file:///{PATCH_DIR / 'Basic.spn'}", + range=lsp.Range( + start=lsp.Position(line=16, character=4), + end=lsp.Position(line=16, character=8), + ), + ), + }, + { + # Memory. Note that this has an address modifier, but still points to the + # original definition. + "symbol": "lap2a#", + "referenced": lsp.Position(line=71, character=7), + "defined": lsp.Location( + uri=f"file:///{PATCH_DIR / 'Basic.spn'}", + range=lsp.Range( + start=lsp.Position(line=16, character=4), + end=lsp.Position(line=16, character=8), + ), + ), + }, + { + # Label + "symbol": "endclr", + "referenced": lsp.Position(line=37, character=9), + "defined": lsp.Location( + uri=f"file:///{PATCH_DIR / 'Basic.spn'}", + range=lsp.Range( + start=lsp.Position(line=41, character=0), + end=lsp.Position(line=41, character=5), + ), + ), + }, +] + + +# Example hovers from the "Basic.spn" patch, for testing hover info +HOVERS: list[HoverDict] = [ + { + "symbol": "mem", + "position": lsp.Position(line=8, character=0), + "contains": "## `MEM`", + }, + { + "symbol": "skp", + "position": lsp.Position(line=37, character=2), + "contains": "## `SKP CMASK,N`", + }, + { + "symbol": "endclr", + "position": lsp.Position(line=37, character=13), + "contains": "(label) ENDCLR: Offset[4]", + }, + { + "symbol": "mono", + "position": lsp.Position(line=47, character=5), + "contains": "(constant) MONO: Literal[32]", + }, + { + "symbol": "lap2b#", + "position": lsp.Position(line=73, character=4), + "contains": "(constant) LAP2B#: Literal[9802]", + }, + { + # CHO RDA, hovering over CHO + "symbol": "CHO_rda", + "position": lsp.Position(line=85, character=0), + "contains": "## `CHO RDA N, C, D`", + }, + { + # CHO RDA, hovering over RDA + "symbol": "cho_RDA", + "position": lsp.Position(line=85, character=4), + "contains": "## `CHO RDA N, C, D`", + }, + { + # Hovering over an int, which should return no hover info + "symbol": "None", + "position": lsp.Position(line=8, character=8), + "contains": None, + }, +] + + +PREPARE_RENAMES: list[PrepareRenameDict] = [ + { + "symbol": "mem", + "position": lsp.Position(line=8, character=0), + "result": None, + "message": "Can't rename non-user defined token MEM.", + }, + { + "symbol": "reg0", + "position": lsp.Position(line=22, character=10), + "result": None, + "message": "Can't rename non-user defined token REG0.", + }, + { + "symbol": "ap1", + "position": lsp.Position(line=8, character=4), + "result": lsp.PrepareRenameResult_Type2(default_behavior=True), + "message": None, + }, + { + "symbol": "endclr", + "position": lsp.Position(line=37, character=10), + "result": lsp.PrepareRenameResult_Type2(default_behavior=True), + "message": None, + }, +] + + +RENAMES: list[RenameDict] = [ + { + "symbol": "ap1", + "rename_to": "FOO", + "position": lsp.Position(line=8, character=4), + "changes": [ + lsp.TextEdit( + range=lsp.Range(start=lsp.Position(8, 4), end=lsp.Position(8, 6)), + new_text="FOO", + ), + # This symbol is `ap1#``, and should be matched when renaming `ap1` + lsp.TextEdit( + range=lsp.Range(start=lsp.Position(51, 4), end=lsp.Position(51, 6)), + new_text="FOO", + ), + lsp.TextEdit( + range=lsp.Range(start=lsp.Position(52, 5), end=lsp.Position(52, 7)), + new_text="FOO", + ), + ], + }, + { + "symbol": "endclr", + "rename_to": "END", + "position": lsp.Position(line=41, character=0), + "changes": [ + lsp.TextEdit( + range=lsp.Range(start=lsp.Position(37, 8), end=lsp.Position(37, 13)), + new_text="END", + ), + lsp.TextEdit( + range=lsp.Range(start=lsp.Position(41, 0), end=lsp.Position(41, 5)), + new_text="END", + ), + ], + }, + { + "symbol": "lap1a#", + "rename_to": "FOO", + "position": lsp.Position(line=61, character=4), + "changes": [ + # Renaming `lap1a#` should also rename `lap1a` + lsp.TextEdit( + range=lsp.Range(start=lsp.Position(12, 4), end=lsp.Position(12, 8)), + new_text="FOO", + ), + lsp.TextEdit( + range=lsp.Range(start=lsp.Position(61, 4), end=lsp.Position(61, 8)), + new_text="FOO", + ), + lsp.TextEdit( + range=lsp.Range(start=lsp.Position(62, 5), end=lsp.Position(62, 9)), + new_text="FOO", + ), + ], + }, +] diff --git a/tests/test_documentation.py b/tests/test_documentation.py index 3dec1e2..d244c9a 100644 --- a/tests/test_documentation.py +++ b/tests/test_documentation.py @@ -1,5 +1,7 @@ """Test the formatting of the documentation files.""" +from __future__ import annotations + import json import mistletoe @@ -8,7 +10,8 @@ from spinasm_lsp.documentation import DocMap -INSTRUCTIONS = DocMap("instructions") +INSTRUCTIONS = DocMap(folders=["instructions"]) +ASSEMBLERS = DocMap(folders=["assemblers"]) VALID_ENTRY_FORMATS = ( "Decimal (0 - 63)", "Decimal (1 - 63)", @@ -42,6 +45,47 @@ ) +def find_content(d: dict): + """Eagerly grab the first content from the dictionary or its children.""" + if "content" in d: + return d["content"] + + if "children" in d: + return find_content(d["children"][0]) + + raise ValueError("No content found.") + + +def validate_copyright(footnote: dict) -> None: + """Validate a Markdown footnote contains a correctly formatted copyright.""" + copyright = ( + "Adapted from Spin Semiconductor SPINAsm & FV-1 Instruction Set reference " + "manual. Copyright 2008 by Spin Semiconductor." + ) + assert footnote["type"] == "Emphasis", "Copyright is missing or incorrect." + assert find_content(footnote) == copyright, "Copyright does not match." + + +def validate_title(title: dict, expected_name: str | None = None) -> None: + """ + Validate a Markdown title is correctly formatted and optionally matches an expected + name. + """ + if expected_name is not None: + assert find_content(title) == expected_name, "Title should match name" + + assert title["level"] == 2, "Title heading should be level 2" + assert title["children"][0]["type"] == "InlineCode" + assert title["children"][0]["children"][0]["type"] == "RawText" + + +def validate_example(example: dict) -> None: + """Validate a Markdown example contains an assembly code block.""" + assert example["type"] == "CodeFence" + assert example["language"] == "assembly", "Language should be 'assembly'" + assert len(example["children"]) == 1 + + def test_instructions_are_unique(): """Test that no unique fields are duplicated between instructions.""" operations = {} @@ -94,15 +138,36 @@ def value_duplicated(value, d: dict) -> bool: assert not duplicate_keys, f"Example duplicated between {duplicate_keys}" -def find_content(d: dict): - """Eagerly grab the first content from the dictionary or it's children.""" - if "content" in d: - return d["content"] +@pytest.mark.parametrize("assembler", ASSEMBLERS.items(), ids=lambda x: x[0]) +def test_assembler_formatting(assembler): + """Test that all assembler markdown files follow the correct format.""" + assembler_name, content = assembler - if "children" in d: - return find_content(d["children"][0]) + ast = json.loads( + mistletoe.markdown(content, renderer=mistletoe.ast_renderer.ASTRenderer) + ) + children = ast["children"] + headings = [child for child in children if child["type"] == "Heading"] + title = headings[0] - raise ValueError("No content found.") + # Check title heading + validate_title(title, expected_name=assembler_name.upper()) + assert children[1]["type"] == "ThematicBreak", "Missing break after title" + + # Check all headings are the correct level + for heading in headings[1:]: + name = find_content(heading) + assert heading["level"] == 3, f"Subheading {name} should be level 3" + + # Check the Example heading exists and contains a code block + example = [h for h in headings if find_content(h) == "Example"] + assert len(example) == 1 + example_content = children[children.index(example[0]) + 1] + validate_example(example_content) + + # Check copyright footnote + footnote = children[-1]["children"][0] + validate_copyright(footnote) @pytest.mark.parametrize("instruction", INSTRUCTIONS.items(), ids=lambda x: x[0]) @@ -117,10 +182,9 @@ def test_instruction_formatting(instruction): headings = [child for child in children if child["type"] == "Heading"] title = headings[0] - # Check title heading - assert title["level"] == 2, "Title heading should be level 2" - assert title["children"][0]["type"] == "InlineCode" - assert title["children"][0]["children"][0]["type"] == "RawText" + # Check title heading. The heading title won't match the instruction name because + # it also includes args. + validate_title(title, expected_name=None) assert children[1]["type"] == "ThematicBreak", "Missing break after title" # Parse the parameters @@ -193,15 +257,8 @@ def test_instruction_formatting(instruction): # Check example code block example_content = children[children.index(example) + 1] - assert example_content["type"] == "CodeFence" - assert example_content["language"] == "assembly", "Language should be 'assembly'" - assert len(example_content["children"]) == 1 + validate_example(example_content) # Check copyright footnote footnote = children[-1]["children"][0] - copyright = ( - "Adapted from Spin Semiconductor SPINAsm & FV-1 Instruction Set reference " - "manual. Copyright 2008 by Spin Semiconductor." - ) - assert footnote["type"] == "Emphasis", "Copyright is missing or incorrect." - assert find_content(footnote) == copyright, "Copyright does not match." + validate_copyright(footnote) diff --git a/tests/test_parser.py b/tests/test_parser.py index 23001d3..8d05d08 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,90 +1,110 @@ -"""Test the parsing of the SpinASM grammar.""" +"""Test the parsing of SPINAsm programs.""" from __future__ import annotations -import random - +import lsprotocol.types as lsp import pytest -from spinasm_lsp.parser import ( - Assignment, - Expression, - FV1Program, - Instruction, - Label, -) - -from .conftest import TEST_PATCHES - - -@pytest.mark.parametrize( - "expr", - ["42", "42.0", "0x2A", "$2A", "%00101010", "%001_01_010"], - ids=["i", "f", "0x", "$", "%", "%_"], -) -def test_number_representations(expr): - """Test supported number representations.""" - assert FV1Program(f"MULX {expr}").statements[0].args[0] == 42 - - -def test_combined_statements(): - """Test a program with multiple statements.""" - code = r""" - ; This is a comment - start: Tmp EQU 4 - EQU Tmp2 5 - MULX 0+1 - SOF -1,TMP - end: - """ - - assert FV1Program(code).statements == [ - Label("START"), - Assignment("equ", "TMP", 4), - Assignment("equ", "TMP2", 5), - Instruction("MULX", args=[Expression([0, "+", 1])]), - Instruction("SOF", args=[Expression([-1]), Expression(["TMP"])]), - Label("END"), - ] - - -@pytest.mark.parametrize("stmt", ["", "SOF 0", "x EQU 4"], ids=["none", "op", "equ"]) -def test_parse_label(stmt: str | None): - """Test that labels are parsed, with and without following statements.""" - prog = FV1Program(f"myLabel:{stmt}") - assert len(prog.statements) == 2 if stmt else 1 - assert prog.statements[0] == Label("MYLABEL") - - -@pytest.mark.parametrize("n_args", [0, 1, 3], ids=lambda x: f"{x} args") -def test_parse_instruction(n_args): - """Test that instructions with varying number of arguments are parsed correctly.""" - args = [random.randint(0, 100) for _ in range(n_args)] - code = f"MULX {','.join(map(str, args))}" - assert FV1Program(code).statements[0] == Instruction("MULX", args=args) - - -@pytest.mark.parametrize("type", ["equ", "mem"]) -@pytest.mark.parametrize("order", ["{name} {type} {val}", "{type} {name} {val}"]) -def test_assign(type: str, order: str): - """Test that assignment with EQU and MEM work with either keyword order.""" - code = order.format(name="A", type=type, val=5) - prog = FV1Program(code) - assert prog.statements[0] == Assignment(f"{type}", "A", 5) - - -def test_parse_instruction_with_multiple_commas(): - """Test that redundant commas are ignored.""" - assert FV1Program("SOF 0,,42").statements[0] == Instruction("SOF", args=[0, 42]) - +from spinasm_lsp.parser import SPINAsmParser, Token, TokenRegistry -def test_whitespace_ignored(): - """Test that whitespace around instructions and assignments are ignored.""" - assert FV1Program(" MULX 0 \n B EQU A*2 \n ") +from .conftest import PATCH_DIR, TEST_PATCHES @pytest.mark.parametrize("patch", TEST_PATCHES, ids=lambda x: x.stem) def test_example_patches(patch): - """Test that the example patches from SpinASM parse correctly.""" - with open(patch, encoding="utf-8-sig") as f: - FV1Program(f.read()) + """Test that the example patches from SPINAsm are parsable.""" + with open(patch, encoding="utf-8") as f: + assert SPINAsmParser(f.read()) + + +@pytest.fixture() +def sentence_token_registry() -> tuple[str, TokenRegistry]: + """A sentence with a token registry for each word.""" + sentence = "This is a line with words." + + # Build a list of word tokens, ignoring whitespace. We'll build the tokens + # consistently with asfv1 parsed tokens. + words = list(filter(lambda x: x, sentence.split(" "))) + token_vals = [{"type": "LABEL", "txt": w, "stxt": w, "val": None} for w in words] + tokens = [] + col = 0 + + for t in token_vals: + start = sentence.index(t["txt"], col) + token = Token(t, start=lsp.Position(line=0, character=start)) + col = token.range.end.character + 1 + + tokens.append(token) + + return sentence, TokenRegistry(tokens) + + +def test_get_token_from_registry(sentence_token_registry): + """Test that tokens are correctly retrieved by position from a registry.""" + sentence, reg = sentence_token_registry + + # Manually build a mapping of column indexes to expected token words + token_positions = {i: None for i in range(len(sentence))} + for i in range(0, 4): + token_positions[i] = "This" + for i in range(7, 9): + token_positions[i] = "is" + for i in range(10, 11): + token_positions[i] = "a" + for i in range(12, 16): + token_positions[i] = "line" + for i in range(20, 24): + token_positions[i] = "with" + for i in range(25, 31): + token_positions[i] = "words." + + for i, word in token_positions.items(): + found_tok = reg.get_token_at_position(lsp.Position(line=0, character=i)) + found_val = found_tok.symbol["txt"] if found_tok is not None else found_tok + msg = f"Expected token `{word}` at col {i}, found `{found_val}`" + assert found_val == word, msg + + +def test_get_token_at_invalid_position_returns_none(sentence_token_registry): + """Test that retrieving tokens from out of bounds always returns None.""" + _, reg = sentence_token_registry + + assert reg.get_token_at_position(lsp.Position(line=99, character=99)) is None + + +def test_get_token_positions(): + """Test getting all positions of a token from a registry.""" + patch = PATCH_DIR / "Basic.spn" + with open(patch) as fp: + source = fp.read() + + parser = SPINAsmParser(source).parse() + + all_matches = parser.token_registry.get_matching_tokens("apout") + assert len(all_matches) == 4 + assert [t.range.start.line for t in all_matches] == [23, 57, 60, 70] + + +def test_concatenate_cho_rdal_tokens(): + """Test that CHO and RDAL tokens are concatenated correctly into CHO RDAL.""" + cho_rdal = Token( + symbol={"type": "MNEMONIC", "txt": "cho", "stxt": "CHO", "val": None}, + start=lsp.Position(line=0, character=0), + ).concatenate( + Token( + symbol={"type": "LABEL", "txt": "rdal", "stxt": "RDAL", "val": None}, + # Put whitespace between CHO and RDAL to test that range is calculated + start=lsp.Position(line=0, character=10), + ) + ) + + assert cho_rdal.symbol == { + "type": "MNEMONIC", + "txt": "cho rdal", + "stxt": "CHO RDAL", + "val": None, + } + + assert cho_rdal.range == lsp.Range( + start=lsp.Position(line=0, character=0), end=lsp.Position(line=0, character=13) + ) diff --git a/tests/test_server.py b/tests/test_server.py new file mode 100644 index 0000000..5029726 --- /dev/null +++ b/tests/test_server.py @@ -0,0 +1,219 @@ +import lsprotocol.types as lsp +import pytest +import pytest_lsp +from pytest_lsp import ClientServerConfig, LanguageClient + +from .conftest import ( + ASSIGNMENTS, + HOVERS, + PATCH_DIR, + PREPARE_RENAMES, + RENAMES, + PrepareRenameDict, + RenameDict, +) + + +@pytest_lsp.fixture( + # params=["neovim", "visual_studio_code"], + params=["visual_studio_code"], + config=ClientServerConfig(server_command=["spinasm-lsp"]), +) +async def client(request, lsp_client: LanguageClient): + # Setup the server + params = lsp.InitializeParams( + capabilities=pytest_lsp.client_capabilities(request.param) + ) + + await lsp_client.initialize_session(params) + yield + + # Shutdown the server after the test + await lsp_client.shutdown_session() + + +@pytest.mark.asyncio() +@pytest.mark.parametrize("assignment", ASSIGNMENTS, ids=lambda x: x["symbol"]) +async def test_definition(assignment: dict, client: LanguageClient): + """Test that the definition location of different assignments is correct.""" + uri = assignment["defined"].uri + result = await client.text_document_definition_async( + params=lsp.DefinitionParams( + position=assignment["referenced"], + text_document=lsp.TextDocumentIdentifier(uri=uri), + ) + ) + + assert result == assignment["defined"] + + +@pytest.mark.asyncio() +async def test_completions(client: LanguageClient): + """Test that expected completions are shown with details and documentation.""" + patch = PATCH_DIR / "Basic.spn" + + results = await client.text_document_completion_async( + params=lsp.CompletionParams( + position=lsp.Position(line=0, character=0), + text_document=lsp.TextDocumentIdentifier(uri=f"file:///{patch.absolute()}"), + ) + ) + assert results is not None, "Expected completions" + completions = [item.label for item in results.items] + + expected_completions = [ + # Memory locations + "AP1", + "LAP1A", + "D2", + # Variables + "MONO", + "APOUT", + "KRF", + # Constants + "REG0", + "SIN0", + # Opcodes + "SOF", + "MULX", + "WRAX", + ] + + for completion in expected_completions: + assert completion in completions, f"Expected completion {completion} not found" + + # Completions for defined values should show their literal value + apout_completion = [item for item in results.items if item.label == "APOUT"][0] + assert apout_completion.detail == "(constant) APOUT: Literal[33]" + assert apout_completion.documentation is None + + # Completions for opcodes should include their documentation + cho_rda_completion = [item for item in results.items if item.label == "CHO RDA"][0] + assert cho_rda_completion.detail == "(opcode)" + assert "## `CHO RDA N, C, D`" in str(cho_rda_completion.documentation) + + +@pytest.mark.asyncio() +async def test_diagnostic_parsing_errors(client: LanguageClient): + """Test that parsing errors and warnings are correctly reported by the server.""" + source_with_errors = """ +; Undefined symbol a +SOF 0,a + +; Label REG0 re-defined +REG0 EQU 4 + +; Register out of range +MULX 100 +""" + + # We need a URI to associate with the source, but it doesn't need to be a real file. + test_uri = "dummy_uri" + client.text_document_did_open( + lsp.DidOpenTextDocumentParams( + text_document=lsp.TextDocumentItem( + uri=test_uri, + language_id="spinasm", + version=1, + text=source_with_errors, + ) + ) + ) + + await client.wait_for_notification(lsp.TEXT_DOCUMENT_PUBLISH_DIAGNOSTICS) + + expected = [ + lsp.Diagnostic( + range=lsp.Range( + start=lsp.Position(line=2, character=6), + end=lsp.Position(line=2, character=6), + ), + message="Undefined label a", + severity=lsp.DiagnosticSeverity.Error, + source="SPINAsm", + ), + lsp.Diagnostic( + range=lsp.Range( + start=lsp.Position(line=5, character=9), + end=lsp.Position(line=5, character=9), + ), + message="Label REG0 re-defined", + severity=lsp.DiagnosticSeverity.Warning, + source="SPINAsm", + ), + lsp.Diagnostic( + range=lsp.Range( + start=lsp.Position(line=8, character=0), + end=lsp.Position(line=8, character=0), + ), + message="Register 0x64 out of range for MULX", + severity=lsp.DiagnosticSeverity.Error, + source="SPINAsm", + ), + ] + + returned = client.diagnostics[test_uri] + extra = len(returned) - len(expected) + assert extra == 0, f"Expected {len(expected)} diagnostics, got {len(returned)}." + + for i, diag in enumerate(expected): + assert diag == returned[i], f"Diagnostic {i} does not match expected" + + +@pytest.mark.parametrize("hover", HOVERS, ids=lambda x: x["symbol"]) +@pytest.mark.asyncio() +async def test_hover(hover: dict, client: LanguageClient): + patch = PATCH_DIR / "Basic.spn" + + result = await client.text_document_hover_async( + params=lsp.CompletionParams( + position=hover["position"], + text_document=lsp.TextDocumentIdentifier(uri=f"file:///{patch.absolute()}"), + ) + ) + + if hover["contains"] is None: + assert result is None, "Expected no hover result" + else: + msg = f"Hover does not contain `{hover['contains']}`" + assert hover["contains"] in result.contents.value, msg + + +@pytest.mark.parametrize("prepare", PREPARE_RENAMES, ids=lambda x: x["symbol"]) +@pytest.mark.asyncio() +async def test_prepare_rename(prepare: PrepareRenameDict, client: LanguageClient): + """Test that prepare rename prevents renaming non-user defined tokens.""" + patch = PATCH_DIR / "Basic.spn" + + result = await client.text_document_prepare_rename_async( + params=lsp.PrepareRenameParams( + position=prepare["position"], + text_document=lsp.TextDocumentIdentifier(uri=f"file:///{patch.absolute()}"), + ) + ) + + assert result == prepare["result"] + + if prepare["message"]: + assert prepare["message"] in client.log_messages[0].message + assert client.log_messages[0].type == lsp.MessageType.Info + else: + assert not client.log_messages + + +@pytest.mark.parametrize("rename", RENAMES, ids=lambda x: x["symbol"]) +@pytest.mark.asyncio() +async def test_rename(rename: RenameDict, client: LanguageClient): + """Test that renaming a symbol suggests the correct edits.""" + patch = PATCH_DIR / "Basic.spn" + + uri = f"file:///{patch.absolute()}" + result = await client.text_document_rename_async( + params=lsp.RenameParams( + position=rename["position"], + new_name=rename["rename_to"], + text_document=lsp.TextDocumentIdentifier(uri=uri), + ) + ) + + assert result.changes[uri] == rename["changes"]