From 409fc5ff5efa339db3d3518dd11316f2619f6778 Mon Sep 17 00:00:00 2001
From: Aaron Zuspan <aa.zuspan@gmail.com>
Date: Fri, 9 Aug 2024 16:11:29 -0700
Subject: [PATCH 01/18] Switch to asfv1 parser

---
 pyproject.toml                         |   4 +-
 src/spinasm_lsp/docs/assemblers/equ.md |  42 +++
 src/spinasm_lsp/docs/assemblers/mem.md |  64 +++++
 src/spinasm_lsp/documentation.py       |  22 +-
 src/spinasm_lsp/parser.py              | 337 +++++++++++--------------
 src/spinasm_lsp/server.py              | 222 ++++++++++++++--
 src/spinasm_lsp/spinasm.lark           |  60 -----
 src/spinasm_lsp/utils.py               | 130 ++++++++++
 tests/test_documentation.py            |  99 ++++++--
 tests/test_parser.py                   |  84 +-----
 tests/test_utils.py                    |  92 +++++++
 11 files changed, 773 insertions(+), 383 deletions(-)
 create mode 100644 src/spinasm_lsp/docs/assemblers/equ.md
 create mode 100644 src/spinasm_lsp/docs/assemblers/mem.md
 delete mode 100644 src/spinasm_lsp/spinasm.lark
 create mode 100644 src/spinasm_lsp/utils.py
 create mode 100644 tests/test_utils.py

diff --git a/pyproject.toml b/pyproject.toml
index f7b8467..b340512 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,11 +5,11 @@ build-backend = "hatchling.build"
 [project]
 name = "spinasm-lsp"
 dynamic = [ "version",]
-description = "A Language Server Protocol implementation for SpinASM"
+description = "A Language Server Protocol implementation for SPINAsm"
 readme = "README.md"
 requires-python = ">=3.9"
 keywords = []
-dependencies = [ "pygls", "lsprotocol", "lark" ]
+dependencies = [ "pygls", "lsprotocol", "asfv1" ]
 [[project.authors]]
 name = "Aaron Zuspan"
 
diff --git a/src/spinasm_lsp/docs/assemblers/equ.md b/src/spinasm_lsp/docs/assemblers/equ.md
new file mode 100644
index 0000000..8be6eba
--- /dev/null
+++ b/src/spinasm_lsp/docs/assemblers/equ.md
@@ -0,0 +1,42 @@
+## `EQU`
+
+------------------
+
+The `EQU` statement allows one to define symbolic operands in order to increase the readability of the source code. Technically an `EQU` statement such as:
+
+```assembly
+Name EQU Value [;Comment]
+```
+
+will cause SPINAsm to replace any occurrence of the literal "Name" by the literal "Value" within each instruction line during the assembly process excluding the comment portion of an instruction line.
+
+With the exception of blanks, any printable character is allowed within the literal "Name". However there are restrictions: "Name" must be an unique string, is limited to 32 characters and the first character must be a letter excluding the "+" and "­" signs and the "!" character.
+
+The reason for not allowing these characters being the first character of "Name" is that any symbolic operand may be prefixed with a sign or the "!" negation operator within the instruction line. The assembler will then perform the required conversion of the operand while processing the individual instruction lines.
+
+There is another, not syntax related, restriction when using symbolic operands defined by an `EQU` statement: Predefined symbols. As given in the end of the manual there is a set of predefined symbolic operands which should be omitted as "Name" literals within an `EQU` statement. It is not that these predefined symbols are prohibited, it is just that using them within an `EQU` statement will overwrite their predefined value.
+
+With the literal "Value" things are slightly more complicated since its format has to comply with the syntactical rules defined for the operand type it is to represent. Although it is suggested to place `EQU` statements at the beginning of the source code file, this is not mandatory. However, the `EQU` statement has to be defined before the literal "Name" can be used as a symbolical operand within an instruction line.
+
+### Remark
+SPINAsm has no way of performing range checking while processing the EQU statement. This is because the operand type of value is not known to SPINAsm at the time the EQU statement is processed. As a result, range checking is performed when assembling the instruction line in which "Name" is to be replaced by "Value".
+
+### Example
+```assembly
+Attn      EQU 0.5      ; 0.5 = -6dB attenuation
+Tmp_Reg   EQU 63       ; Temporary register within register file
+Tmp_Del   EQU $2000    ; Temporary memory location within delay ram
+;
+;------------------------------
+sof       0,0          ; Clear ACC
+rda       Tmp_Del,Attn ; Load sample from delay ram $2000,
+                       ; multiply it by 0.5 and add ACC content
+wrax      Tmp_Reg,1.0  ; Save result to Tmp_Reg but keep it in ACC
+wrax      DACL,0       ; Move ACC to DAC left (predefined symbol)
+                       ; and then clear ACC
+```
+
+If `Tmp_Del` was accidentally replaced by `Tmp_Reg` within the `rda` instruction line, SPINAsm would not detect this semantic error – simply because using `Tmp_Reg` would be syntactically correct.
+
+------------------
+*Adapted from Spin Semiconductor SPINAsm & FV-1 Instruction Set reference manual. Copyright 2008 by Spin Semiconductor.*
\ No newline at end of file
diff --git a/src/spinasm_lsp/docs/assemblers/mem.md b/src/spinasm_lsp/docs/assemblers/mem.md
new file mode 100644
index 0000000..8ca98af
--- /dev/null
+++ b/src/spinasm_lsp/docs/assemblers/mem.md
@@ -0,0 +1,64 @@
+## `MEM`
+
+------------------
+
+The `MEM` Statement allows the user to partition the delay ram memory into individual blocks. A memory block declared by the statement
+
+```assembly
+Name `MEM` Size [;Comment]
+```
+
+can be referenced by `Name` from within an instruction line. `Name` has to comply with the same syntactical rules previously defined with the EQU statement, "Size" is an unsigned integer in the range of 1 to 32768 which might be entered either in decimal or in hexadecimal.
+
+Besides the explicit identifier `Name` the assembler defines two additional implicit identifiers, `Name#` and `Name^`. `Name` refers to the first memory location within the memory block, whereas `Name#` refers to the last memory location. The identifier `Name^` references the middle of the memory block, or in other words its center. If a memory block of size 1 is defined, all three identifiers will address the same memory location. In case the memory block is of size 2, `Name` and `Name^` will address the same memory location, if the size is an even number the memory block cannot exactly be halved – the midpoint `Name^` will be calculated as: `size MOD 2`.
+
+Optionally all three identifiers can be offset by a positive or negative integer which is entered in decimal. Although range checking is performed when using offsets, there is no error generated if the result of the address calculation exceeds the address range of the memory block. This is also true for those cases in which the result will "wrap around" the physical 32k boundary of the delay memory. However, a warning will be issued in order to alert the user regarding the out of range condition.
+
+Mapping the memory blocks to their physical delay ram addresses is solely handled by SPINAsm. The user has no possibility to explicitly force SPINAsm to place a certain memory block to a specific physical address range. This of course does not mean that the user has no control over the layout of the delay ram at all: Knowing that SPINAsm will map memory blocks in the order they become defined within the source file, the user can implicitly control the memory map of the delay ram.
+
+### Example
+```assembly
+DelR      MEM  1024    ; Right channel delay line
+DelL      MEM  1024    ; Left channel delay line
+                       ;
+;------------------------------
+sof       0,0          ; Clear ACC
+rdax      ADCL,1.0     ; Read in left ADC
+wra       DelL,0.25    ; Save it to the start of the left delay
+                       ; line and keep a -12dB replica in ACC
+rdax      DelL^+20,0.25; Add sample from "center of the left delay
+                       ; line + 20 samples" times 0.25 to ACC
+rdax      DelL#,0.25   ; Add sample from "end of the left delay
+                       ; line" times 0.25 to ACC
+rdax      DelL-512,0.25; Add sample from "start of the left delay
+                       ; line - 512 samples" times 0.25 to ACC
+```
+
+### Remark
+At this point the result of the address calculation will reference a sample from outside the `DelL` memory block. While being syntactically correct, the instruction might not result in what the user intended. In order to make the user aware of that potential semantic error, a warning will be issued.
+
+```assembly
+wrax      DACL,0       ; Result to DACL, clear ACC
+                       ;
+rdax      ADCR,1.0     ; Read in right ADC
+wra       DelR,0.25    ; Save it to the start of the right delay
+                       ; line and keep a -12dB replica in ACC
+rdax      DelR^-20,0.25; Add sample from center of the right delay
+                       ; line - 20 samples times 0.25 to ACC
+rdax      DelR#,0.25   ; Add sample from end of the right delay line
+                       ; times 0.25 to ACC
+rdax      DelR-512,0.25; Add sample from start of the right delay
+                       ; line - 512 samples times 0.25 to ACC
+```
+
+### Remark
+At this point the result of the address calculation will reference a sample from outside the `DelR` memory block. And even worse than the previous case: This time the sample be fetched from delay ram address 32256 which will contain a sample that is apx. 1 second old!
+
+Again, syntactically correct but most likely a semantic error – warnings will be issued.
+
+```assembly
+wrax DACR,0         ; Result to DACR, clear ACC
+```
+
+------------------
+*Adapted from Spin Semiconductor SPINAsm & FV-1 Instruction Set reference manual. Copyright 2008 by Spin Semiconductor.*
\ No newline at end of file
diff --git a/src/spinasm_lsp/documentation.py b/src/spinasm_lsp/documentation.py
index eda40da..8f2a371 100644
--- a/src/spinasm_lsp/documentation.py
+++ b/src/spinasm_lsp/documentation.py
@@ -21,8 +21,8 @@ def read(self) -> str:
 class DocMap(UserDict):
     """A mapping of instructions to markdown documentation strings."""
 
-    def __init__(self, folder: str):
-        self.dir = Path(str(DOC_DIR.joinpath(folder)))
+    def __init__(self, folders: list[str]):
+        self.folders = [Path(str(DOC_DIR.joinpath(folder))) for folder in folders]
         self.data = self.load_markdown()
 
     @staticmethod
@@ -40,15 +40,13 @@ def __contains__(self, key):
 
     def load_markdown(self) -> dict[str, str]:
         data = {}
-        files = self.dir.glob("*.md")
-        for file in files:
-            md = MarkdownFile(file)
-            # Store with lowercase keys to allow case-insensitive searches
-            data[md.name.lower()] = md.read()
+        for folder in self.folders:
+            if not folder.exists():
+                raise FileNotFoundError(f"Folder {folder} does not exist.")
+            files = folder.glob("*.md")
+            for file in files:
+                md = MarkdownFile(file)
+                # Store with lowercase keys to allow case-insensitive searches
+                data[md.name.lower()] = md.read()
 
         return data
-
-
-if __name__ == "__main__":
-    instructions = DocMap(folder="instructions")
-    print(instructions["RDAX"])
diff --git a/src/spinasm_lsp/parser.py b/src/spinasm_lsp/parser.py
index 2d78352..76f4218 100644
--- a/src/spinasm_lsp/parser.py
+++ b/src/spinasm_lsp/parser.py
@@ -1,213 +1,162 @@
 from __future__ import annotations
 
-from dataclasses import dataclass
-from typing import Literal
+from asfv1 import fv1parse
+from lsprotocol import types as lsp
 
-from lark import Lark, Transformer, v_args
-from lark.exceptions import VisitError
+from spinasm_lsp.utils import CallbackDict, Token, TokenRegistry
 
 
-class ParsingError(Exception): ...
+class SPINAsmParser(fv1parse):
+    """A modified version of fv1parse optimized for use with LSP."""
 
+    def __init__(self, source: str):
+        self.diagnostics: list[lsp.Diagnostic] = []
+        self.definitions: dict[str, lsp.Position] = {}
+        self.col: int = 0
+        self.prevcol: int = 0
+        self.token_registry = TokenRegistry()
 
-@dataclass
-class Expression:
-    expression: list[int | float | str]
+        super().__init__(
+            source=source,
+            clamp=True,
+            spinreals=False,
+            # Ignore the callbacks in favor of overriding their callers
+            wfunc=lambda *args, **kwargs: None,
+            efunc=lambda *args, **kwargs: None,
+        )
 
-    def __eq__(self, other):
-        # If the expression has a single value, match against that
-        if len(self.expression) == 1:
-            return self.expression[0] == other
+        # Keep an unchanged copy of the original source
+        self._source: list[str] = self.source.copy()
 
-        # Otherwise, match the expression as a concated string of values and operators
-        return " ".join(map(str, self.expression)) == other
+        # Wrap the dictionaries to record whenever a definition is added
+        self.jmptbl: CallbackDict = CallbackDict(
+            self.jmptbl, callback=self._on_definition
+        )
+        self.symtbl: CallbackDict = CallbackDict(
+            self.symtbl, callback=self._on_definition
+        )
 
+    def _on_definition(self, label: str):
+        """Record the program location when a definition is added."""
+        # Don't record the position of constants that are defined at program
+        # initialization.
+        if self.current_line == -1:
+            return
 
-@dataclass
-class Instruction:
-    opcode: str
-    args: list[Expression]
+        # Due to the parsing order, the current line will be correct for labels but
+        # incorrect for assignments, which need to use previous line instead.
+        line = self.current_line if label in self.jmptbl else self.previous_line
 
+        # Try to find the position of the label on the definition line. Remove address
+        # modifiers from the label name, since those are defined implicitly by the
+        # parser rather than in the source.
+        try:
+            col = (
+                self._source[line]
+                .upper()
+                .index(label.replace("#", "").replace("^", ""))
+            )
+        except ValueError:
+            col = 0
+
+        self.definitions[label] = lsp.Position(line, col)
+
+    def __mkopcodes__(self):
+        """
+        No-op.
+
+        Generating opcodes isn't needed for LSP functionality, so we'll skip it.
+        """
+
+    def _record_diagnostic(
+        self, msg: str, line: int, col: int, severity: lsp.DiagnosticSeverity
+    ):
+        """Record a diagnostic message for the LSP."""
+        self.diagnostics.append(
+            lsp.Diagnostic(
+                range=lsp.Range(
+                    start=lsp.Position(line, character=col),
+                    end=lsp.Position(line, character=col),
+                ),
+                message=msg,
+                severity=severity,
+                source="SPINAsm",
+            )
+        )
 
-@dataclass
-class Assignment:
-    type: Literal["equ", "mem"]
-    name: str
-    value: Expression
-
-
-@dataclass
-class Label:
-    name: str
-
-
-@v_args(inline=True)
-class FV1ProgramTransformer(Transformer):
-    local_vars: dict[str, float]
-    memory: dict[str, float]
-
-    def __init__(
-        self,
-        local_vars: dict[str, float],
-        memory: dict[str, float],
-        visit_tokens: bool = True,
-    ) -> None:
-        self.local_vars = local_vars
-        self.memory = memory
-        super().__init__(visit_tokens=visit_tokens)
-
-    def instruction(self, opcode: str, args: list | None, _) -> Instruction:
-        return Instruction(opcode, args or [])
-
-    def assignment(self, mapping, _):
-        return mapping
-
-    def label(self, name) -> Label:
-        return Label(name)
-
-    def equ(self, name: str, value) -> Assignment:
-        self.local_vars[name] = value
-        return Assignment(type="equ", name=name, value=value)
-
-    def mem(self, name: str, value) -> Assignment:
-        self.memory[name] = value
-        return Assignment(type="mem", name=name, value=value)
-
-    def value(self, negative: str | None, value: float) -> float:
-        """A negated value."""
-        if negative:
-            value *= -1
-
-        return value
-
-    def DEC_NUM(self, token) -> int | float:
-        if "." in token:
-            return float(token)
-        return int(token)
-
-    def HEX_NUM(self, token) -> int:
-        # Hex numbers can be written with either $ or 0x prefix
-        token = token.replace("$", "0x")
-        return int(token, base=16)
-
-    def BIT_VECTOR(self, token) -> int:
-        # Remove the % prefix and optional underscores
-        return int(token[1:].replace("_", ""), base=2)
-
-    def IDENT(self, token) -> str:
-        # Identifiers are case-insensitive and are stored in uppercase for consistency
-        # with the FV-1 assembler.
-        return token.upper()
-
-    @v_args(inline=False)
-    def args(self, tokens):
-        return tokens
-
-    @v_args(inline=False)
-    def expr(self, tokens):
-        return Expression(tokens)
-
-    @v_args(inline=False)
-    def program(self, tokens):
-        return list(tokens)
-
-
-class FV1Program:
-    constants = {
-        "SIN0_RATE": 0x00,
-        "SIN0_RANGE": 0x01,
-        "SIN1_RATE": 0x02,
-        "SIN1_RANGE": 0x03,
-        "RMP0_RATE": 0x04,
-        "RMP0_RANGE": 0x05,
-        "RMP1_RATE": 0x06,
-        "RMP1_RANGE": 0x07,
-        "POT0": 0x10,
-        "POT1": 0x11,
-        "POT2": 0x12,
-        "ADCL": 0x14,
-        "ADCR": 0x15,
-        "DACL": 0x16,
-        "DACR": 0x17,
-        "ADDR_PTR": 0x18,
-        "REG0": 0x20,
-        "REG1": 0x21,
-        "REG2": 0x22,
-        "REG3": 0x23,
-        "REG4": 0x24,
-        "REG5": 0x25,
-        "REG6": 0x26,
-        "REG7": 0x27,
-        "REG8": 0x28,
-        "REG9": 0x29,
-        "REG10": 0x2A,
-        "REG11": 0x2B,
-        "REG12": 0x2C,
-        "REG13": 0x2D,
-        "REG14": 0x2E,
-        "REG15": 0x2F,
-        "REG16": 0x30,
-        "REG17": 0x31,
-        "REG18": 0x32,
-        "REG19": 0x33,
-        "REG20": 0x34,
-        "REG21": 0x35,
-        "REG22": 0x36,
-        "REG23": 0x37,
-        "REG24": 0x38,
-        "REG25": 0x39,
-        "REG26": 0x3A,
-        "REG27": 0x3B,
-        "REG28": 0x3C,
-        "REG29": 0x3D,
-        "REG30": 0x3E,
-        "REG31": 0x3F,
-        "SIN0": 0x00,
-        "SIN1": 0x01,
-        "RMP0": 0x02,
-        "RMP1": 0x03,
-        "RDA": 0x00,
-        "SOF": 0x02,
-        "RDAL": 0x03,
-        "SIN": 0x00,
-        "COS": 0x01,
-        "REG": 0x02,
-        "COMPC": 0x04,
-        "COMPA": 0x08,
-        "RPTR2": 0x10,
-        "NA": 0x20,
-        "RUN": 0x10,
-        "ZRC": 0x08,
-        "ZRO": 0x04,
-        "GEZ": 0x02,
-        "NEG": 0x01,
-    }
-    local_vars: dict[str, float] = {**constants}
-    memory: dict[str, float] = {}
-
-    def __init__(self, code: str):
-        self.transformer = FV1ProgramTransformer(
-            local_vars=self.local_vars, memory=self.memory
+    def parseerror(self, msg: str, line: int | None = None):
+        """Override to record parsing errors as LSP diagnostics."""
+        if line is None:
+            line = self.prevline
+
+        # Offset the line from the parser's 1-indexed line to the 0-indexed line
+        self._record_diagnostic(
+            msg, line - 1, col=self.col, severity=lsp.DiagnosticSeverity.Error
         )
 
-        self.parser = Lark.open_from_package(
-            package="spinasm_lsp",
-            grammar_path="spinasm.lark",
-            start="program",
-            parser="lalr",
-            strict=False,
-            transformer=self.transformer,
+    def scanerror(self, msg: str):
+        """Override to record scanning errors as LSP diagnostics."""
+        self._record_diagnostic(
+            msg, self.current_line, col=self.col, severity=lsp.DiagnosticSeverity.Error
         )
 
-        # Make sure the code ends with a newline to properly parse the last line
-        if not code.endswith("\n"):
-            code += "\n"
+    def parsewarn(self, msg: str, line: int | None = None):
+        """Override to record parsing warnings as LSP diagnostics."""
+        if line is None:
+            line = self.prevline
 
-        try:
-            self.statements = self.parser.parse(code)
-        except VisitError as e:
-            # Unwrap errors thrown by FV1ProgramTransformer
-            if wrapped_err := e.__context__:
-                raise wrapped_err from None
+        # Offset the line from the parser's 1-indexed line to the 0-indexed line
+        self._record_diagnostic(
+            msg, line - 1, col=self.col, severity=lsp.DiagnosticSeverity.Warning
+        )
 
-            raise e
+    @property
+    def sline(self):
+        return self._sline
+
+    @sline.setter
+    def sline(self, value):
+        """Update the current line and reset the column."""
+        self._sline = value
+
+        # Reset the column to 0 when we move to a new line
+        self.prevcol = self.col
+        self.col = 0
+
+    @property
+    def current_line(self):
+        """Get the zero-indexed current line."""
+        return self.sline - 1
+
+    @property
+    def previous_line(self):
+        """Get the zero-indexed previous line."""
+        return self.prevline - 1
+
+    def __next__(self):
+        """Parse the next symbol and update the column."""
+        super().__next__()
+        # TODO: Make sure super().__next__ can't get stuck in an infinite loop since I
+        # removed the maxerr check
+        self._update_column()
+
+        token_width = len(self.sym["txt"] or "")
+        token = Token(self.sym, self.current_line, self.col, self.col + token_width)
+        self.token_registry.register_token(token)
+
+    def _update_column(self):
+        """Set the current column based on the last parsed symbol."""
+        current_line_txt = self._source[self.current_line]
+        current_symbol = self.sym.get("txt", None) or ""
+
+        self.prevcol = self.col
+        try:
+            # Start at the current column to skip previous duplicates of the symbol
+            self.col = current_line_txt.index(current_symbol, self.col)
+        except ValueError:
+            self.col = 0
+
+    def parse(self) -> SPINAsmParser:
+        """Parse and return the parser."""
+        super().parse()
+        return self
diff --git a/src/spinasm_lsp/server.py b/src/spinasm_lsp/server.py
index daf5015..c5efa23 100644
--- a/src/spinasm_lsp/server.py
+++ b/src/spinasm_lsp/server.py
@@ -1,47 +1,239 @@
+from __future__ import annotations
+
 from lsprotocol import types as lsp
 from pygls.server import LanguageServer
+from pygls.workspace import TextDocument
 
 from . import __version__
 from .documentation import DocMap
 from .logging import ServerLogger
+from .parser import SPINAsmParser
 
-LSP_SERVER = LanguageServer(
-    name="spinasm-lsp",
-    version=__version__,
-    max_workers=5,
-)
 
-LOGGER = ServerLogger(LSP_SERVER)
+class SPINAsmLanguageServer(LanguageServer):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, name="spinasm-lsp", version=__version__, **kwargs)
+        self.parser: SPINAsmParser = None
+
 
+LSP_SERVER = SPINAsmLanguageServer(max_workers=5)
 # TODO: Probably load async as part of a custom language server subclass
-INSTRUCTIONS = DocMap(folder="instructions")
+DOCUMENTATION = DocMap(folders=["instructions", "assemblers"])
+LOGGER = ServerLogger(LSP_SERVER)
+
+
+async def _get_diagnostics(document: TextDocument) -> list[lsp.Diagnostic]:
+    """Parse a document and return diagnostics."""
+    try:
+        parser = SPINAsmParser(document.source).parse()
+        diagnostics = parser.diagnostics
+
+        # Store the last parsed state to avoid needing to re-parse for hover
+        LSP_SERVER.parser = parser
+    except Exception as e:
+        LOGGER.error(e)
+        diagnostics = []
+
+    return diagnostics
+
+
+@LSP_SERVER.feature(lsp.TEXT_DOCUMENT_DID_CHANGE)
+async def did_change(
+    ls: SPINAsmLanguageServer, params: lsp.DidChangeTextDocumentParams
+):
+    """Run diagnostics on changed document."""
+    document = ls.workspace.get_text_document(params.text_document.uri)
+    diagnostics = await _get_diagnostics(document)
+    ls.publish_diagnostics(document.uri, diagnostics)
+
+
+@LSP_SERVER.feature(lsp.TEXT_DOCUMENT_DID_SAVE)
+async def did_save(ls: SPINAsmLanguageServer, params: lsp.DidSaveTextDocumentParams):
+    """Run diagnostics on saved document."""
+    document = ls.workspace.get_text_document(params.text_document.uri)
+    diagnostics = await _get_diagnostics(document)
+
+    ls.publish_diagnostics(document.uri, diagnostics)
+
+
+@LSP_SERVER.feature(lsp.TEXT_DOCUMENT_DID_OPEN)
+async def did_open(ls: SPINAsmLanguageServer, params: lsp.DidOpenTextDocumentParams):
+    """Run diagnostics on open document."""
+    document = ls.workspace.get_text_document(params.text_document.uri)
+    diagnostics = await _get_diagnostics(document)
+
+    ls.publish_diagnostics(document.uri, diagnostics)
+
+
+@LSP_SERVER.feature(lsp.TEXT_DOCUMENT_DID_CLOSE)
+def did_close(params: lsp.DidCloseTextDocumentParams) -> None:
+    """LSP handler for textDocument/didClose request."""
+    text_document = LSP_SERVER.workspace.get_text_document(params.text_document.uri)
+    # Clear the diagnostics on close
+    LSP_SERVER.publish_diagnostics(text_document.uri, [])
 
 
 @LSP_SERVER.feature(lsp.TEXT_DOCUMENT_HOVER)
-def hover(ls: LanguageServer, params: lsp.HoverParams) -> lsp.Hover:
+def hover(ls: SPINAsmLanguageServer, params: lsp.HoverParams) -> lsp.Hover | None:
     """Retrieve documentation from symbols on hover."""
+    pos = params.position
+
+    # TODO: Fix. Not exactly sure what the issue is, but it seems like I'm not getting
+    # hover for some symbols towards the end of lines, I think?
+    # I bet get_token_at_position doesn't work with the last token due to indexing
+    token = ls.parser.token_registry.get_token_at_position(pos.line, pos.character)
+    if token is None:
+        return None
+
+    token_val = token.value["stxt"]
+    token_type = token.value["type"]
+
+    # stxt should only be None for EOF tokens, but check to be sure.
+    if token_type == "EOF" or not isinstance(token_val, str):
+        return None
+
+    hover_msg = None
+    if token_type in ("LABEL", "TARGET"):
+        # Label definitions and targets
+        if token_val in LSP_SERVER.parser.jmptbl:
+            hover_definition = LSP_SERVER.parser.jmptbl[token_val.upper()]
+            hover_msg = f"(label) {token_val}: Offset[**{hover_definition}**]"
+        # Variable and memory definitions
+        elif token_val in LSP_SERVER.parser.symtbl:
+            hover_definition = LSP_SERVER.parser.symtbl[token_val.upper()]
+            hover_msg = f"(constant) {token_val}: Literal[**{hover_definition}**]"
+        # Special case for hovering over the second word of a CHO instruction, which
+        # should be treated as part of the instruction for retrieving documentation.
+        if token_val in ("SOF", "RDAL", "RDA") and str(token.prev_token) == "CHO":
+            token_val = f"CHO {token_val}"
+            hover_msg = DOCUMENTATION.get(token_val, "")
+    # Opcodes and assignments
+    elif token_type in ("ASSEMBLER", "MNEMONIC"):
+        # CHO is a special opcode that treats its first argument as part of the
+        # instruction, for the sake of documentation.
+        if token_val == "CHO" and token.next_token is not None:
+            token_val = f"CHO {str(token.next_token)}"
+
+        hover_msg = DOCUMENTATION.get(token_val, "")
+
+    return (
+        None
+        if not hover_msg
+        else lsp.Hover(
+            contents=lsp.MarkupContent(kind=lsp.MarkupKind.Markdown, value=hover_msg),
+        )
+    )
+
+
+@LSP_SERVER.feature(lsp.TEXT_DOCUMENT_COMPLETION)
+def completions(
+    ls: SPINAsmLanguageServer, params: lsp.CompletionParams | None = None
+) -> lsp.CompletionList:
+    """Returns completion items."""
+    opcodes = [k.upper() for k in DOCUMENTATION]
+    symbols = list(ls.parser.symtbl.keys())
+    labels = list(ls.parser.jmptbl.keys())
+    mem = list(ls.parser.mem.keys())
+
+    opcode_items = [
+        lsp.CompletionItem(label=k, kind=lsp.CompletionItemKind.Function)
+        for k in opcodes
+    ]
+    symbol_items = [
+        lsp.CompletionItem(label=k, kind=lsp.CompletionItemKind.Constant)
+        for k in symbols
+    ]
+    label_items = [
+        lsp.CompletionItem(label=k, kind=lsp.CompletionItemKind.Reference)
+        for k in labels
+    ]
+    mem_items = [
+        lsp.CompletionItem(label=k, kind=lsp.CompletionItemKind.Variable) for k in mem
+    ]
+
+    return lsp.CompletionList(
+        is_incomplete=False, items=opcode_items + mem_items + symbol_items + label_items
+    )
+
+
+@LSP_SERVER.feature(lsp.TEXT_DOCUMENT_DEFINITION)
+def definition(
+    ls: SPINAsmLanguageServer, params: lsp.DefinitionParams
+) -> lsp.Location | None:
+    """Returns the definition location of a symbol."""
     document = ls.workspace.get_text_document(params.text_document.uri)
     pos = params.position
 
-    # TODO: Handle multi-word instructions like CHO RDA, CHO SOF, CHO RDAL
+    # TODO: Probably switch to token registry
     try:
-        word = document.word_at_position(pos)
+        word = document.word_at_position(pos).upper()
     except IndexError:
         return None
 
-    word_docs = INSTRUCTIONS.get(word, None)
-    if word_docs:
-        return lsp.Hover(
-            contents=lsp.MarkupContent(kind=lsp.MarkupKind.Markdown, value=word_docs),
+    if word in ls.parser.definitions:
+        return lsp.Location(
+            uri=document.uri,
+            range=lsp.Range(
+                start=ls.parser.definitions[word],
+                end=ls.parser.definitions[word],
+            ),
         )
 
     return None
 
 
+@LSP_SERVER.feature(lsp.TEXT_DOCUMENT_PREPARE_RENAME)
+def prepare_rename(ls: SPINAsmLanguageServer, params: lsp.PrepareRenameParams):
+    """Called by the client to determine if renaming the symbol at the given location
+    is a valid operation."""
+    pos = params.position
+
+    # TODO: Fix. Not exactly sure what the issue is, but it seems like I'm not getting
+    # hover for some symbols towards the end of lines, I think?
+    token = ls.parser.token_registry.get_token_at_position(pos.line, pos.character)
+    if token is None:
+        LOGGER.debug(f"No token to rename at {pos}.")
+        return None
+
+    # Only user-defined labels should support renaming
+    if str(token) not in ls.parser.definitions:
+        LOGGER.debug(f"Can't rename non-user defined token {token}.")
+        return None
+
+    return lsp.PrepareRenameResult_Type2(default_behavior=True)
+
+
+@LSP_SERVER.feature(
+    lsp.TEXT_DOCUMENT_RENAME, options=lsp.RenameOptions(prepare_provider=True)
+)
+def rename(ls: SPINAsmLanguageServer, params: lsp.RenameParams):
+    pos = params.position
+
+    # TODO: Fix. Not exactly sure what the issue is, but it seems like I'm not getting
+    # hover for some symbols towards the end of lines, I think?
+    token = ls.parser.token_registry.get_token_at_position(pos.line, pos.character)
+    if token is None:
+        return None
+
+    matching_tokens = ls.parser.token_registry.get_matching_tokens(str(token))
+
+    edits = [
+        lsp.TextEdit(
+            range=lsp.Range(
+                start=lsp.Position(t.line, t.col_start),
+                end=lsp.Position(t.line, t.col_end),
+            ),
+            new_text=params.new_name,
+        )
+        for t in matching_tokens
+    ]
+
+    return lsp.WorkspaceEdit(changes={params.text_document.uri: edits})
+
+
 def start() -> None:
     LSP_SERVER.start_io()
 
 
 if __name__ == "__main__":
-    instructions = DocMap(folder="instructions")
     start()
diff --git a/src/spinasm_lsp/spinasm.lark b/src/spinasm_lsp/spinasm.lark
deleted file mode 100644
index 95b0953..0000000
--- a/src/spinasm_lsp/spinasm.lark
+++ /dev/null
@@ -1,60 +0,0 @@
-program: (instruction | assignment | label)*
-
-instruction: OPCODE [args] NEWLINE
-args: expr (","+ expr)*
-expr: value (OPERATOR value)*
-value: [NEGATIVE] (NAME | HEX_NUM | DEC_NUM | BIT_VECTOR)
-
-assignment: (equ | mem) NEWLINE
-equ: IDENT "EQU"i expr | "EQU"i IDENT expr
-mem: IDENT "MEM"i expr | "MEM"i IDENT expr
-
-label: IDENT ":"
-
-
-OPCODE.1: "ABSA"i
-    | "AND"i
-    | "CHO"i
-    | "CLR"i
-    | "EXP"i
-    | "JAM"i
-    | "LDAX"i
-    | "LOG"i
-    | "MAXX"i
-    | "MULX"i
-    | "NOT"i
-    | "OR"i
-    | "RDA"i
-    | "RDAX"i
-    | "RDFX"i
-    | "RMPA"i
-    | "SKP"i
-    | "SOF"i
-    | "WLDR"i
-    | "WLDS"i
-    | "WRA"i
-    | "WRAP"i
-    | "WRAX"i
-    | "WRHX"i
-    | "WRLX"i
-    | "XOR"i
-
-// NAME can be suffixed with ^ or # to modify memory addressing
-NAME: IDENT [ADDR_MODIFIER]
-ADDR_MODIFIER: ["^" | "#"]
-NEGATIVE: "-"
-COMMENT: ";" /[^\n]/*
-OPERATOR: "+" | "-" | "*" | "/" | "|" | "&"
-HEX_NUM.1: "0x"i HEXDIGIT+ | "$" HEXDIGIT+
-BIT_VECTOR: "%" /[01](_?[01])*/
-
-%import common.WS_INLINE
-%import common.WS
-%import common.NEWLINE
-%import common.HEXDIGIT
-%import common.CNAME        -> IDENT
-%import common.NUMBER       -> DEC_NUM
-
-%ignore WS
-%ignore WS_INLINE
-%ignore COMMENT
diff --git a/src/spinasm_lsp/utils.py b/src/spinasm_lsp/utils.py
new file mode 100644
index 0000000..05e4a15
--- /dev/null
+++ b/src/spinasm_lsp/utils.py
@@ -0,0 +1,130 @@
+from __future__ import annotations
+
+import bisect
+from collections import UserDict
+from dataclasses import dataclass
+from typing import Callable, Literal, TypedDict
+
+# Types of tokens defined by asfv1
+TokenType = Literal[
+    "ASSEMBLER",
+    "EOF",
+    "INTEGER",
+    "LABEL",
+    "TARGET",
+    "MNEMONIC",
+    "OPERATOR",
+    "FLOAT",
+    "ARGSEP",
+]
+
+
+class CallbackDict(UserDict):
+    """A dictionary that fires a callback when an item is set."""
+
+    def __init__(self, dict=None, /, callback: Callable | None = None, **kwargs):
+        self.callback = callback or (lambda key: None)
+        super().__init__(dict, **kwargs)
+
+    def __setitem__(self, key, value):
+        super().__setitem__(key, value)
+        self.callback(key)
+
+
+class TokenValue(TypedDict):
+    """The token specification used by asfv1."""
+
+    type: TokenType
+    txt: str | None
+    stxt: str | None
+    val: int | float | None
+
+
+# TODO: Probably use lsp.Position and lsp.Range to define the token location
+@dataclass
+class Token:
+    """A token and its position in a source file."""
+
+    value: TokenValue
+    line: int
+    col_start: int
+    col_end: int
+    next_token: Token | None = None
+    prev_token: Token | None = None
+
+    def __str__(self):
+        return self.value["stxt"] or "Empty token"
+
+    def __repr__(self):
+        return str(self)
+
+
+class TokenRegistry:
+    """A registry of tokens and their positions in a source file."""
+
+    def __init__(self, tokens: list[Token] | None = None) -> None:
+        self._prev_token: Token | None = None
+
+        """A dictionary mapping program lines to all Tokens on that line."""
+        self.lines: dict[int, list[Token]] = {}
+
+        """A dictionary mapping token names to all matching Tokens in the program."""
+        self.directory: dict[str, list[Token]] = {}
+
+        for token in tokens or []:
+            self.register_token(token)
+
+    def register_token(self, token: Token) -> None:
+        """Add a token to the registry."""
+        # TODO: Maybe handle multi-word CHO instructions here, by merging with the next
+        # token? The tricky part is that the next token would still end up getting
+        # registered unless we prevent it... If we end up with overlapping tokens, that
+        # will break `get_token_at_position`. I could check if prev token was CHO when
+        # I register RDAL, SOF, or RDA, and if so register them as one and unregister
+        # the previous?
+        if token.line not in self.lines:
+            self.lines[token.line] = []
+
+        # Record the previous and next token for each token to allow traversing
+        if self._prev_token:
+            token.prev_token = self._prev_token
+            self._prev_token.next_token = token
+
+        # Store the token on its line
+        self.lines[token.line].append(token)
+        self._prev_token = token
+
+        # Store user-defined tokens in the registry. Other token types could be stored,
+        # but currently there's no use case for retrieving their positions.
+        if token.value["type"] in ("LABEL", "TARGET"):
+            if str(token) not in self.directory:
+                self.directory[str(token)] = []
+            self.directory[str(token)].append(token)
+
+    def get_matching_tokens(self, token_name: str) -> list[Token]:
+        """Retrieve all tokens with a given name in the program."""
+        return self.directory.get(token_name.upper(), [])
+
+    def get_token_at_position(self, line: int, col: int) -> Token | None:
+        """Retrieve the token at the given line and column index."""
+        if line not in self.lines:
+            return None
+
+        line_tokens = self.lines[line]
+        token_starts = [t.col_start for t in line_tokens]
+        token_ends = [t.col_end for t in line_tokens]
+
+        pos = bisect.bisect_left(token_starts, col)
+
+        # The index returned by bisect_left points to the start value >= col. This will
+        # either be the first character of the token or the start of the next token.
+        # First check if we're out of bounds, then shift left unless we're at the first
+        # character of the token.
+        if pos == len(line_tokens) or token_starts[pos] != col:
+            pos -= 1
+
+        # If the col falls after the end of the token, we're not inside a token.
+        if col > token_ends[pos]:
+            return None
+
+        return line_tokens[pos]
diff --git a/tests/test_documentation.py b/tests/test_documentation.py
index 3dec1e2..d244c9a 100644
--- a/tests/test_documentation.py
+++ b/tests/test_documentation.py
@@ -1,5 +1,7 @@
 """Test the formatting of the documentation files."""
 
+from __future__ import annotations
+
 import json
 
 import mistletoe
@@ -8,7 +10,8 @@
 
 from spinasm_lsp.documentation import DocMap
 
-INSTRUCTIONS = DocMap("instructions")
+INSTRUCTIONS = DocMap(folders=["instructions"])
+ASSEMBLERS = DocMap(folders=["assemblers"])
 VALID_ENTRY_FORMATS = (
     "Decimal (0 - 63)",
     "Decimal (1 - 63)",
@@ -42,6 +45,47 @@
 )
 
 
+def find_content(d: dict):
+    """Eagerly grab the first content from the dictionary or its children."""
+    if "content" in d:
+        return d["content"]
+
+    if "children" in d:
+        return find_content(d["children"][0])
+
+    raise ValueError("No content found.")
+
+
+def validate_copyright(footnote: dict) -> None:
+    """Validate a Markdown footnote contains a correctly formatted copyright."""
+    copyright = (
+        "Adapted from Spin Semiconductor SPINAsm & FV-1 Instruction Set reference "
+        "manual. Copyright 2008 by Spin Semiconductor."
+    )
+    assert footnote["type"] == "Emphasis", "Copyright is missing or incorrect."
+    assert find_content(footnote) == copyright, "Copyright does not match."
+
+
+def validate_title(title: dict, expected_name: str | None = None) -> None:
+    """
+    Validate a Markdown title is correctly formatted and optionally matches an expected
+    name.
+    """
+    if expected_name is not None:
+        assert find_content(title) == expected_name, "Title should match name"
+
+    assert title["level"] == 2, "Title heading should be level 2"
+    assert title["children"][0]["type"] == "InlineCode"
+    assert title["children"][0]["children"][0]["type"] == "RawText"
+
+
+def validate_example(example: dict) -> None:
+    """Validate a Markdown example contains an assembly code block."""
+    assert example["type"] == "CodeFence"
+    assert example["language"] == "assembly", "Language should be 'assembly'"
+    assert len(example["children"]) == 1
+
+
 def test_instructions_are_unique():
     """Test that no unique fields are duplicated between instructions."""
     operations = {}
@@ -94,15 +138,36 @@ def value_duplicated(value, d: dict) -> bool:
         assert not duplicate_keys, f"Example duplicated between {duplicate_keys}"
 
 
-def find_content(d: dict):
-    """Eagerly grab the first content from the dictionary or it's children."""
-    if "content" in d:
-        return d["content"]
+@pytest.mark.parametrize("assembler", ASSEMBLERS.items(), ids=lambda x: x[0])
+def test_assembler_formatting(assembler):
+    """Test that all assembler markdown files follow the correct format."""
+    assembler_name, content = assembler
 
-    if "children" in d:
-        return find_content(d["children"][0])
+    ast = json.loads(
+        mistletoe.markdown(content, renderer=mistletoe.ast_renderer.ASTRenderer)
+    )
+    children = ast["children"]
+    headings = [child for child in children if child["type"] == "Heading"]
+    title = headings[0]
 
-    raise ValueError("No content found.")
+    # Check title heading
+    validate_title(title, expected_name=assembler_name.upper())
+    assert children[1]["type"] == "ThematicBreak", "Missing break after title"
+
+    # Check all headings are the correct level
+    for heading in headings[1:]:
+        name = find_content(heading)
+        assert heading["level"] == 3, f"Subheading {name} should be level 3"
+
+    # Check the Example heading exists and contains a code block
+    example = [h for h in headings if find_content(h) == "Example"]
+    assert len(example) == 1
+    example_content = children[children.index(example[0]) + 1]
+    validate_example(example_content)
+
+    # Check copyright footnote
+    footnote = children[-1]["children"][0]
+    validate_copyright(footnote)
 
 
 @pytest.mark.parametrize("instruction", INSTRUCTIONS.items(), ids=lambda x: x[0])
@@ -117,10 +182,9 @@ def test_instruction_formatting(instruction):
     headings = [child for child in children if child["type"] == "Heading"]
     title = headings[0]
 
-    # Check title heading
-    assert title["level"] == 2, "Title heading should be level 2"
-    assert title["children"][0]["type"] == "InlineCode"
-    assert title["children"][0]["children"][0]["type"] == "RawText"
+    # Check title heading. The heading title won't match the instruction name because
+    # it also includes args.
+    validate_title(title, expected_name=None)
     assert children[1]["type"] == "ThematicBreak", "Missing break after title"
 
     # Parse the parameters
@@ -193,15 +257,8 @@ def test_instruction_formatting(instruction):
 
     # Check example code block
     example_content = children[children.index(example) + 1]
-    assert example_content["type"] == "CodeFence"
-    assert example_content["language"] == "assembly", "Language should be 'assembly'"
-    assert len(example_content["children"]) == 1
+    validate_example(example_content)
 
     # Check copyright footnote
     footnote = children[-1]["children"][0]
-    copyright = (
-        "Adapted from Spin Semiconductor SPINAsm & FV-1 Instruction Set reference "
-        "manual. Copyright 2008 by Spin Semiconductor."
-    )
-    assert footnote["type"] == "Emphasis", "Copyright is missing or incorrect."
-    assert find_content(footnote) == copyright, "Copyright does not match."
+    validate_copyright(footnote)
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 23001d3..c0cf880 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1,90 +1,16 @@
-"""Test the parsing of the SpinASM grammar."""
+"""Test the parsing of SPINAsm programs."""
 
 from __future__ import annotations
 
-import random
-
 import pytest
 
-from spinasm_lsp.parser import (
-    Assignment,
-    Expression,
-    FV1Program,
-    Instruction,
-    Label,
-)
+from spinasm_lsp.parser import SPINAsmParser
 
 from .conftest import TEST_PATCHES
 
 
-@pytest.mark.parametrize(
-    "expr",
-    ["42", "42.0", "0x2A", "$2A", "%00101010", "%001_01_010"],
-    ids=["i", "f", "0x", "$", "%", "%_"],
-)
-def test_number_representations(expr):
-    """Test supported number representations."""
-    assert FV1Program(f"MULX {expr}").statements[0].args[0] == 42
-
-
-def test_combined_statements():
-    """Test a program with multiple statements."""
-    code = r"""
-    ; This is a comment
-    start: Tmp EQU 4
-    EQU Tmp2 5
-    MULX 0+1
-    SOF -1,TMP
-    end:
-    """
-
-    assert FV1Program(code).statements == [
-        Label("START"),
-        Assignment("equ", "TMP", 4),
-        Assignment("equ", "TMP2", 5),
-        Instruction("MULX", args=[Expression([0, "+", 1])]),
-        Instruction("SOF", args=[Expression([-1]), Expression(["TMP"])]),
-        Label("END"),
-    ]
-
-
-@pytest.mark.parametrize("stmt", ["", "SOF 0", "x EQU 4"], ids=["none", "op", "equ"])
-def test_parse_label(stmt: str | None):
-    """Test that labels are parsed, with and without following statements."""
-    prog = FV1Program(f"myLabel:{stmt}")
-    assert len(prog.statements) == 2 if stmt else 1
-    assert prog.statements[0] == Label("MYLABEL")
-
-
-@pytest.mark.parametrize("n_args", [0, 1, 3], ids=lambda x: f"{x} args")
-def test_parse_instruction(n_args):
-    """Test that instructions with varying number of arguments are parsed correctly."""
-    args = [random.randint(0, 100) for _ in range(n_args)]
-    code = f"MULX {','.join(map(str, args))}"
-    assert FV1Program(code).statements[0] == Instruction("MULX", args=args)
-
-
-@pytest.mark.parametrize("type", ["equ", "mem"])
-@pytest.mark.parametrize("order", ["{name} {type} {val}", "{type} {name} {val}"])
-def test_assign(type: str, order: str):
-    """Test that assignment with EQU and MEM work with either keyword order."""
-    code = order.format(name="A", type=type, val=5)
-    prog = FV1Program(code)
-    assert prog.statements[0] == Assignment(f"{type}", "A", 5)
-
-
-def test_parse_instruction_with_multiple_commas():
-    """Test that redundant commas are ignored."""
-    assert FV1Program("SOF 0,,42").statements[0] == Instruction("SOF", args=[0, 42])
-
-
-def test_whitespace_ignored():
-    """Test that whitespace around instructions and assignments are ignored."""
-    assert FV1Program(" MULX 0  \n B EQU A*2 \n   ")
-
-
 @pytest.mark.parametrize("patch", TEST_PATCHES, ids=lambda x: x.stem)
 def test_example_patches(patch):
-    """Test that the example patches from SpinASM parse correctly."""
-    with open(patch, encoding="utf-8-sig") as f:
-        FV1Program(f.read())
+    """Test that the example patches from SPINAsm are parsable."""
+    with open(patch, encoding="utf-8") as f:
+        assert SPINAsmParser(f.read())
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 0000000..f22a650
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,92 @@
+"""Test LSP utilities."""
+
+import itertools
+
+import pytest
+
+from spinasm_lsp import utils
+from spinasm_lsp.parser import SPINAsmParser
+
+from .conftest import PATCH_DIR
+
+
+@pytest.fixture()
+def sentence_token_registry() -> tuple[str, utils.TokenRegistry]:
+    """A sentence with a token registry for each word."""
+    sentence = "This   is a line    with words."
+
+    # Build a list of word tokens, ignoring whitespace. We'll build the tokens
+    # consistently with asfv1 parsed tokens.
+    words = list(filter(lambda x: x, sentence.split(" ")))
+    token_vals = [{"type": "LABEL", "txt": w, "stxt": w, "val": None} for w in words]
+    tokens = []
+    col = 0
+
+    for t in token_vals:
+        start = sentence.index(t["txt"], col)
+        end = start + len(t["txt"]) - 1
+        col = end + 1
+        tokens.append(utils.Token(t, line=0, col_start=start, col_end=end))
+
+    return sentence, utils.TokenRegistry(tokens)
+
+
+def test_callback_dict():
+    """Test that a CallbackDict calls its callback function when values are set."""
+    key_store = []
+    items = {
+        "foo": 42,
+        "bar": 0,
+        "baz": 99,
+    }
+    d = utils.CallbackDict(items, callback=lambda k: key_store.append(k))
+    assert d == items
+    assert key_store == ["foo", "bar", "baz"]
+
+
+def test_get_token_from_registry(sentence_token_registry):
+    """Test that tokens are correctly retrieved by position from a registry."""
+    sentence, reg = sentence_token_registry
+
+    # Manually build a mapping of column indexes to expected token words
+    token_positions = {i: None for i in range(len(sentence))}
+    for i in range(0, 4):
+        token_positions[i] = "This"
+    for i in range(7, 9):
+        token_positions[i] = "is"
+    for i in range(10, 11):
+        token_positions[i] = "a"
+    for i in range(12, 16):
+        token_positions[i] = "line"
+    for i in range(20, 24):
+        token_positions[i] = "with"
+    for i in range(25, 31):
+        token_positions[i] = "words."
+
+    for i, word in token_positions.items():
+        found_tok = reg.get_token_at_position(line=0, col=i)
+        found_val = found_tok.value["txt"] if found_tok is not None else found_tok
+        msg = f"Expected token `{word}` at col {i}, found `{found_val}`"
+        assert found_val == word, msg
+
+
+@pytest.mark.parametrize("idx", list(itertools.product([-99, 99], [-99, 99])), ids=str)
+def test_get_token_at_invalid_position_returns_none(idx, sentence_token_registry):
+    """Test that retrieving tokens from out of bounds always returns None."""
+    line, col = idx
+    _, reg = sentence_token_registry
+
+    assert reg.get_token_at_position(line=line, col=col) is None
+
+
+def test_get_token_positions():
+    """Test getting all positions of a token from a registry."""
+    patch = PATCH_DIR / "Basic.spn"
+    with open(patch) as fp:
+        source = fp.read()
+
+    parser = SPINAsmParser(source).parse()
+
+    all_matches = parser.token_registry.get_matching_tokens("apout")
+    assert len(all_matches) == 4
+    assert [t.line for t in all_matches] == [23, 57, 60, 70]

From a40f92145e1662e521a0d49b1560118f432636dd Mon Sep 17 00:00:00 2001
From: Aaron Zuspan <aa.zuspan@gmail.com>
Date: Fri, 9 Aug 2024 16:44:48 -0700
Subject: [PATCH 02/18] Refactoring and cleanup

---
 README.md                  | 15 ++++++++
 pyproject.toml             |  6 ++-
 src/spinasm_lsp/logging.py | 21 -----------
 src/spinasm_lsp/server.py  | 77 ++++++++++++++++++++------------------
 4 files changed, 60 insertions(+), 59 deletions(-)
 delete mode 100644 src/spinasm_lsp/logging.py

diff --git a/README.md b/README.md
index e69de29..9299aa8 100644
--- a/README.md
+++ b/README.md
@@ -0,0 +1,15 @@
+# SPINAsm LSP Server
+
+A Language Server Protocol (LSP) server to provide language support for the [SPINAsm assembly language](http://www.spinsemi.com/Products/datasheets/spn1001-dev/SPINAsmUserManual.pdf). The LSP is built on an extended version of the [asfv1](https://github.com/ndf-zz/asfv1) parser.
+
+## Features
+
+- **Diagnostics**: Reports the location of syntax errors and warnings.
+- **Hover**: Shows opcode documentation and assigned values on hover.
+- **Completion**: Provides suggestions for opcodes, labels, and defined values.
+- **Renaming**: Allows renaming of labels and defined values.
+- **Go to definition**: Jumps to the definition of a label or defined value.
+
+## Disclaimer
+
+This is an unofficial project and is not affiliated with Spin Semiconductor. Included documentation and examples are Copyright © 2018 Spin Semiconductor.
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index b340512..9d7a9bc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,11 @@ description = "A Language Server Protocol implementation for SPINAsm"
 readme = "README.md"
 requires-python = ">=3.9"
 keywords = []
-dependencies = [ "pygls", "lsprotocol", "asfv1" ]
+dependencies = [ 
+    "pygls", 
+    "lsprotocol", 
+    "asfv1==1.2.7",
+]
 [[project.authors]]
 name = "Aaron Zuspan"
 
diff --git a/src/spinasm_lsp/logging.py b/src/spinasm_lsp/logging.py
deleted file mode 100644
index 72f1491..0000000
--- a/src/spinasm_lsp/logging.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from typing import Any
-
-from lsprotocol import types as lsp
-from pygls import server
-
-
-class ServerLogger:
-    def __init__(self, server: server.LanguageServer):
-        self.server = server
-
-    def debug(self, msg: Any) -> None:
-        self.server.show_message_log(str(msg), lsp.MessageType.Debug)
-
-    def info(self, msg: Any) -> None:
-        self.server.show_message_log(str(msg), lsp.MessageType.Info)
-
-    def warning(self, msg: Any) -> None:
-        self.server.show_message_log(str(msg), lsp.MessageType.Warning)
-
-    def error(self, msg: Any) -> None:
-        self.server.show_message_log(str(msg), lsp.MessageType.Error)
diff --git a/src/spinasm_lsp/server.py b/src/spinasm_lsp/server.py
index c5efa23..51bbab3 100644
--- a/src/spinasm_lsp/server.py
+++ b/src/spinasm_lsp/server.py
@@ -1,12 +1,13 @@
 from __future__ import annotations
 
+from typing import Any
+
 from lsprotocol import types as lsp
 from pygls.server import LanguageServer
 from pygls.workspace import TextDocument
 
 from . import __version__
 from .documentation import DocMap
-from .logging import ServerLogger
 from .parser import SPINAsmParser
 
 
@@ -15,26 +16,38 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, name="spinasm-lsp", version=__version__, **kwargs)
         self.parser: SPINAsmParser = None
 
+    def debug(self, msg: Any) -> None:
+        """Log a debug message."""
+        self.show_message_log(str(msg), lsp.MessageType.Debug)
 
-LSP_SERVER = SPINAsmLanguageServer(max_workers=5)
-# TODO: Probably load async as part of a custom language server subclass
-DOCUMENTATION = DocMap(folders=["instructions", "assemblers"])
-LOGGER = ServerLogger(LSP_SERVER)
+    def info(self, msg: Any) -> None:
+        """Log an info message."""
+        self.show_message_log(str(msg), lsp.MessageType.Info)
 
+    def warning(self, msg: Any) -> None:
+        """Log a warning message."""
+        self.show_message_log(str(msg), lsp.MessageType.Warning)
 
-async def _get_diagnostics(document: TextDocument) -> list[lsp.Diagnostic]:
-    """Parse a document and return diagnostics."""
-    try:
-        parser = SPINAsmParser(document.source).parse()
-        diagnostics = parser.diagnostics
+    def error(self, msg: Any) -> None:
+        """Log an error message."""
+        self.show_message_log(str(msg), lsp.MessageType.Error)
+
+    async def parse(self, document: TextDocument) -> SPINAsmParser:
+        """Parse a document and publish diagnostics."""
+        try:
+            self.parser = SPINAsmParser(document.source).parse()
+            diagnostics = self.parser.diagnostics
+        except Exception as e:
+            self.error(e)
+            diagnostics = []
 
-        # Store the last parsed state to avoid needing to re-parse for hover
-        LSP_SERVER.parser = parser
-    except Exception as e:
-        LOGGER.error(e)
-        diagnostics = []
+        self.publish_diagnostics(document.uri, diagnostics)
+        return self.parser
 
-    return diagnostics
+
+LSP_SERVER = SPINAsmLanguageServer(max_workers=5)
+# TODO: Probably load async as part of a custom language server subclass
+DOCUMENTATION = DocMap(folders=["instructions", "assemblers"])
 
 
 @LSP_SERVER.feature(lsp.TEXT_DOCUMENT_DID_CHANGE)
@@ -43,34 +56,31 @@ async def did_change(
 ):
     """Run diagnostics on changed document."""
     document = ls.workspace.get_text_document(params.text_document.uri)
-    diagnostics = await _get_diagnostics(document)
-    ls.publish_diagnostics(document.uri, diagnostics)
+    await ls.parse(document)
 
 
 @LSP_SERVER.feature(lsp.TEXT_DOCUMENT_DID_SAVE)
 async def did_save(ls: SPINAsmLanguageServer, params: lsp.DidSaveTextDocumentParams):
     """Run diagnostics on saved document."""
     document = ls.workspace.get_text_document(params.text_document.uri)
-    diagnostics = await _get_diagnostics(document)
-
-    ls.publish_diagnostics(document.uri, diagnostics)
+    await ls.parse(document)
 
 
 @LSP_SERVER.feature(lsp.TEXT_DOCUMENT_DID_OPEN)
 async def did_open(ls: SPINAsmLanguageServer, params: lsp.DidOpenTextDocumentParams):
     """Run diagnostics on open document."""
     document = ls.workspace.get_text_document(params.text_document.uri)
-    diagnostics = await _get_diagnostics(document)
-
-    ls.publish_diagnostics(document.uri, diagnostics)
+    await ls.parse(document)
 
 
 @LSP_SERVER.feature(lsp.TEXT_DOCUMENT_DID_CLOSE)
-def did_close(params: lsp.DidCloseTextDocumentParams) -> None:
-    """LSP handler for textDocument/didClose request."""
-    text_document = LSP_SERVER.workspace.get_text_document(params.text_document.uri)
+def did_close(
+    ls: SPINAsmLanguageServer, params: lsp.DidCloseTextDocumentParams
+) -> None:
+    """Clear the diagnostics on close."""
+    text_document = ls.workspace.get_text_document(params.text_document.uri)
     # Clear the diagnostics on close
-    LSP_SERVER.publish_diagnostics(text_document.uri, [])
+    ls.publish_diagnostics(text_document.uri, [])
 
 
 @LSP_SERVER.feature(lsp.TEXT_DOCUMENT_HOVER)
@@ -78,9 +88,6 @@ def hover(ls: SPINAsmLanguageServer, params: lsp.HoverParams) -> lsp.Hover | Non
     """Retrieve documentation from symbols on hover."""
     pos = params.position
 
-    # TODO: Fix. Not exactly sure what the issue is, but it seems like I'm not getting
-    # hover for some symbols towards the end of lines, I think?
-    # I bet get_token_at_position doesn't work with the last token due to indexing
     token = ls.parser.token_registry.get_token_at_position(pos.line, pos.character)
     if token is None:
         return None
@@ -188,16 +195,14 @@ def prepare_rename(ls: SPINAsmLanguageServer, params: lsp.PrepareRenameParams):
     is a valid operation."""
     pos = params.position
 
-    # TODO: Fix. Not exactly sure what the issue is, but it seems like I'm not getting
-    # hover for some symbols towards the end of lines, I think?
     token = ls.parser.token_registry.get_token_at_position(pos.line, pos.character)
     if token is None:
-        LOGGER.debug(f"No token to rename at {pos}.")
+        ls.debug(f"No token to rename at {pos}.")
         return None
 
     # Only user-defined labels should support renaming
     if str(token) not in ls.parser.definitions:
-        LOGGER.debug(f"Can't rename non-user defined token {token}.")
+        ls.debug(f"Can't rename non-user defined token {token}.")
         return None
 
     return lsp.PrepareRenameResult_Type2(default_behavior=True)
@@ -209,8 +214,6 @@ def prepare_rename(ls: SPINAsmLanguageServer, params: lsp.PrepareRenameParams):
 def rename(ls: SPINAsmLanguageServer, params: lsp.RenameParams):
     pos = params.position
 
-    # TODO: Fix. Not exactly sure what the issue is, but it seems like I'm not getting
-    # hover for some symbols towards the end of lines, I think?
     token = ls.parser.token_registry.get_token_at_position(pos.line, pos.character)
     if token is None:
         return None

From a93b5b63650f415b5fbc0adbe53300bddbf602e3 Mon Sep 17 00:00:00 2001
From: Aaron Zuspan <50475791+aazuspan@users.noreply.github.com>
Date: Fri, 9 Aug 2024 16:47:15 -0700
Subject: [PATCH 03/18] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 9299aa8..b5c3e96 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,6 @@ A Language Server Protocol (LSP) server to provide language support for the [SPI
 - **Renaming**: Allows renaming of labels and defined values.
 - **Go to definition**: Jumps to the definition of a label or defined value.
 
-## Disclaimer
+------
 
-This is an unofficial project and is not affiliated with Spin Semiconductor. Included documentation and examples are Copyright © 2018 Spin Semiconductor.
\ No newline at end of file
+*This project is unaffiliated with Spin Semiconductor. Included documentation and examples are Copyright © 2018 Spin Semiconductor.*

From 21848ef0b0ccfbb45a6370eaec8d07e20fcc4117 Mon Sep 17 00:00:00 2001
From: Aaron Zuspan <aa.zuspan@gmail.com>
Date: Sat, 10 Aug 2024 19:13:16 -0700
Subject: [PATCH 04/18] Add pytest_lsp tests

---
 pyproject.toml            |   5 +-
 src/spinasm_lsp/server.py | 149 ++++++++++++++++------------
 src/spinasm_lsp/utils.py  |  14 ++-
 tests/conftest.py         | 201 +++++++++++++++++++++++++++++++++++++
 tests/test_server.py      | 203 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 505 insertions(+), 67 deletions(-)
 create mode 100644 tests/test_server.py

diff --git a/pyproject.toml b/pyproject.toml
index 9d7a9bc..5e6cce9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,6 +23,9 @@ file = "LICENSE"
 [project.urls]
 Homepage = "https://github.com/aazuspan/spinasm-lsp"
 
+[project.scripts]
+spinasm-lsp = "spinasm_lsp.server:start"
+
 [tool.ruff]
 fix = true
 show-fixes = true
@@ -37,7 +40,7 @@ select = [ "E", "I", "F", "B", "FA", "UP", "PT", "Q", "RET", "SIM", "PERF",]
 dependencies = [ "pre-commit",]
 
 [tool.hatch.envs.test]
-dependencies = [ "pytest", "pytest-cov", "mistletoe" ]
+dependencies = [ "pytest", "pytest-cov", "mistletoe", "pytest-lsp" ]
 
 [tool.hatch.envs.test_matrix]
 template = "test"
diff --git a/src/spinasm_lsp/server.py b/src/spinasm_lsp/server.py
index 51bbab3..6e57ec1 100644
--- a/src/spinasm_lsp/server.py
+++ b/src/spinasm_lsp/server.py
@@ -1,24 +1,36 @@
 from __future__ import annotations
 
+from functools import lru_cache
 from typing import Any
 
 from lsprotocol import types as lsp
 from pygls.server import LanguageServer
-from pygls.workspace import TextDocument
 
 from . import __version__
 from .documentation import DocMap
 from .parser import SPINAsmParser
 
 
+@lru_cache(maxsize=1)
+def _parse_document(source: str) -> SPINAsmParser:
+    """
+    Parse a document and return the parser.
+
+    Parser are cached based on the source code to speed up subsequent parsing.
+    """
+    return SPINAsmParser(source).parse()
+
+
 class SPINAsmLanguageServer(LanguageServer):
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, **kwargs) -> None:
+        self._prev_parser: SPINAsmParser | None = None
         super().__init__(*args, name="spinasm-lsp", version=__version__, **kwargs)
-        self.parser: SPINAsmParser = None
 
     def debug(self, msg: Any) -> None:
         """Log a debug message."""
-        self.show_message_log(str(msg), lsp.MessageType.Debug)
+        # MessageType.Debug is a proposed feature of 3.18.0, and isn't fully supported
+        # yet.
+        self.show_message_log(str(msg), lsp.MessageType.Log)
 
     def info(self, msg: Any) -> None:
         """Log an info message."""
@@ -32,63 +44,60 @@ def error(self, msg: Any) -> None:
         """Log an error message."""
         self.show_message_log(str(msg), lsp.MessageType.Error)
 
-    async def parse(self, document: TextDocument) -> SPINAsmParser:
-        """Parse a document and publish diagnostics."""
-        try:
-            self.parser = SPINAsmParser(document.source).parse()
-            diagnostics = self.parser.diagnostics
-        except Exception as e:
-            self.error(e)
-            diagnostics = []
+    async def get_parser(self, uri: str) -> SPINAsmParser:
+        """Return a parser for the document, caching if possible."""
+        document = self.workspace.get_text_document(uri)
+        parser = _parse_document(document.source)
 
-        self.publish_diagnostics(document.uri, diagnostics)
-        return self.parser
+        # Skip publishing diagnostics if the parser is unchanged
+        if parser is not self._prev_parser:
+            self.publish_diagnostics(document.uri, parser.diagnostics)
+            self._prev_parser = parser
 
+        return parser
 
-LSP_SERVER = SPINAsmLanguageServer(max_workers=5)
+
+server = SPINAsmLanguageServer(max_workers=5)
 # TODO: Probably load async as part of a custom language server subclass
 DOCUMENTATION = DocMap(folders=["instructions", "assemblers"])
 
 
-@LSP_SERVER.feature(lsp.TEXT_DOCUMENT_DID_CHANGE)
+@server.feature(lsp.TEXT_DOCUMENT_DID_CHANGE)
 async def did_change(
     ls: SPINAsmLanguageServer, params: lsp.DidChangeTextDocumentParams
 ):
     """Run diagnostics on changed document."""
-    document = ls.workspace.get_text_document(params.text_document.uri)
-    await ls.parse(document)
+    await ls.get_parser(params.text_document.uri)
 
 
-@LSP_SERVER.feature(lsp.TEXT_DOCUMENT_DID_SAVE)
+@server.feature(lsp.TEXT_DOCUMENT_DID_SAVE)
 async def did_save(ls: SPINAsmLanguageServer, params: lsp.DidSaveTextDocumentParams):
     """Run diagnostics on saved document."""
-    document = ls.workspace.get_text_document(params.text_document.uri)
-    await ls.parse(document)
+    await ls.get_parser(params.text_document.uri)
 
 
-@LSP_SERVER.feature(lsp.TEXT_DOCUMENT_DID_OPEN)
+@server.feature(lsp.TEXT_DOCUMENT_DID_OPEN)
 async def did_open(ls: SPINAsmLanguageServer, params: lsp.DidOpenTextDocumentParams):
     """Run diagnostics on open document."""
-    document = ls.workspace.get_text_document(params.text_document.uri)
-    await ls.parse(document)
+    await ls.get_parser(params.text_document.uri)
 
 
-@LSP_SERVER.feature(lsp.TEXT_DOCUMENT_DID_CLOSE)
+@server.feature(lsp.TEXT_DOCUMENT_DID_CLOSE)
 def did_close(
     ls: SPINAsmLanguageServer, params: lsp.DidCloseTextDocumentParams
 ) -> None:
     """Clear the diagnostics on close."""
-    text_document = ls.workspace.get_text_document(params.text_document.uri)
-    # Clear the diagnostics on close
-    ls.publish_diagnostics(text_document.uri, [])
+    ls.publish_diagnostics(params.text_document.uri, [])
 
 
-@LSP_SERVER.feature(lsp.TEXT_DOCUMENT_HOVER)
-def hover(ls: SPINAsmLanguageServer, params: lsp.HoverParams) -> lsp.Hover | None:
+@server.feature(lsp.TEXT_DOCUMENT_HOVER)
+async def hover(ls: SPINAsmLanguageServer, params: lsp.HoverParams) -> lsp.Hover | None:
     """Retrieve documentation from symbols on hover."""
+    parser = await ls.get_parser(params.text_document.uri)
+
     pos = params.position
 
-    token = ls.parser.token_registry.get_token_at_position(pos.line, pos.character)
+    token = parser.token_registry.get_token_at_position(pos.line, pos.character)
     if token is None:
         return None
 
@@ -101,24 +110,28 @@ def hover(ls: SPINAsmLanguageServer, params: lsp.HoverParams) -> lsp.Hover | Non
 
     hover_msg = None
     if token_type in ("LABEL", "TARGET"):
+        # Special case for hovering over the second word of a CHO instruction, which
+        # should be treated as part of the instruction for retrieving documentation.
+        if token_val == "RDA" and str(token.prev_token) == "CHO":
+            token_val = f"CHO {token_val}"
+            hover_msg = DOCUMENTATION.get(token_val, "")
         # Label definitions and targets
-        if token_val in LSP_SERVER.parser.jmptbl:
-            hover_definition = LSP_SERVER.parser.jmptbl[token_val.upper()]
+        elif token_val in parser.jmptbl:
+            hover_definition = parser.jmptbl[token_val.upper()]
             hover_msg = f"(label) {token_val}: Offset[**{hover_definition}**]"
         # Variable and memory definitions
-        elif token_val in LSP_SERVER.parser.symtbl:
-            hover_definition = LSP_SERVER.parser.symtbl[token_val.upper()]
+        elif token_val in parser.symtbl:
+            hover_definition = parser.symtbl[token_val.upper()]
             hover_msg = f"(constant) {token_val}: Literal[**{hover_definition}**]"
+    # Opcodes and assignments
+    elif token_type in ("ASSEMBLER", "MNEMONIC"):
         # Special case for hovering over the second word of a CHO instruction, which
         # should be treated as part of the instruction for retrieving documentation.
-        if token_val in ("SOF", "RDAL", "RDA") and str(token.prev_token) == "CHO":
+        if token_val in ("SOF", "RDA") and str(token.prev_token) == "CHO":
             token_val = f"CHO {token_val}"
-            hover_msg = DOCUMENTATION.get(token_val, "")
-    # Opcodes and assignments
-    elif token_type in ("ASSEMBLER", "MNEMONIC"):
         # CHO is a special opcode that treats its first argument as part of the
         # instruction, for the sake of documentation.
-        if token_val == "CHO" and token.next_token is not None:
+        elif token_val == "CHO" and token.next_token is not None:
             token_val = f"CHO {str(token.next_token)}"
 
         hover_msg = DOCUMENTATION.get(token_val, "")
@@ -132,15 +145,17 @@ def hover(ls: SPINAsmLanguageServer, params: lsp.HoverParams) -> lsp.Hover | Non
     )
 
 
-@LSP_SERVER.feature(lsp.TEXT_DOCUMENT_COMPLETION)
-def completions(
-    ls: SPINAsmLanguageServer, params: lsp.CompletionParams | None = None
+@server.feature(lsp.TEXT_DOCUMENT_COMPLETION)
+async def completions(
+    ls: SPINAsmLanguageServer, params: lsp.CompletionParams
 ) -> lsp.CompletionList:
     """Returns completion items."""
+    parser = await ls.get_parser(params.text_document.uri)
+
     opcodes = [k.upper() for k in DOCUMENTATION]
-    symbols = list(ls.parser.symtbl.keys())
-    labels = list(ls.parser.jmptbl.keys())
-    mem = list(ls.parser.mem.keys())
+    symbols = list(parser.symtbl.keys())
+    labels = list(parser.jmptbl.keys())
+    mem = list(parser.mem.keys())
 
     opcode_items = [
         lsp.CompletionItem(label=k, kind=lsp.CompletionItemKind.Function)
@@ -163,62 +178,68 @@ def completions(
     )
 
 
-@LSP_SERVER.feature(lsp.TEXT_DOCUMENT_DEFINITION)
-def definition(
+@server.feature(lsp.TEXT_DOCUMENT_DEFINITION)
+async def definition(
     ls: SPINAsmLanguageServer, params: lsp.DefinitionParams
 ) -> lsp.Location | None:
     """Returns the definition location of a symbol."""
+    parser = await ls.get_parser(params.text_document.uri)
+
     document = ls.workspace.get_text_document(params.text_document.uri)
     pos = params.position
 
-    # TODO: Probably switch to token registry
+    # TODO: Probably switch to token registry. Note that when I do, it might break
+    # definitions for addresses with # and ^, maybe?
     try:
         word = document.word_at_position(pos).upper()
     except IndexError:
         return None
 
-    if word in ls.parser.definitions:
+    if word in parser.definitions:
         return lsp.Location(
             uri=document.uri,
             range=lsp.Range(
-                start=ls.parser.definitions[word],
-                end=ls.parser.definitions[word],
+                start=parser.definitions[word],
+                end=parser.definitions[word],
             ),
         )
 
     return None
 
 
-@LSP_SERVER.feature(lsp.TEXT_DOCUMENT_PREPARE_RENAME)
-def prepare_rename(ls: SPINAsmLanguageServer, params: lsp.PrepareRenameParams):
+@server.feature(lsp.TEXT_DOCUMENT_PREPARE_RENAME)
+async def prepare_rename(ls: SPINAsmLanguageServer, params: lsp.PrepareRenameParams):
     """Called by the client to determine if renaming the symbol at the given location
     is a valid operation."""
-    pos = params.position
+    parser = await ls.get_parser(params.text_document.uri)
 
-    token = ls.parser.token_registry.get_token_at_position(pos.line, pos.character)
+    pos = params.position
+    token = parser.token_registry.get_token_at_position(pos.line, pos.character)
     if token is None:
-        ls.debug(f"No token to rename at {pos}.")
+        ls.info(f"No token to rename at {pos}.")
         return None
 
     # Only user-defined labels should support renaming
-    if str(token) not in ls.parser.definitions:
-        ls.debug(f"Can't rename non-user defined token {token}.")
+    if str(token) not in parser.definitions:
+        ls.info(f"Can't rename non-user defined token {token}.")
         return None
 
     return lsp.PrepareRenameResult_Type2(default_behavior=True)
 
 
-@LSP_SERVER.feature(
+@server.feature(
     lsp.TEXT_DOCUMENT_RENAME, options=lsp.RenameOptions(prepare_provider=True)
 )
-def rename(ls: SPINAsmLanguageServer, params: lsp.RenameParams):
+async def rename(ls: SPINAsmLanguageServer, params: lsp.RenameParams):
+    parser = await ls.get_parser(params.text_document.uri)
+
     pos = params.position
 
-    token = ls.parser.token_registry.get_token_at_position(pos.line, pos.character)
+    token = parser.token_registry.get_token_at_position(pos.line, pos.character)
     if token is None:
         return None
 
-    matching_tokens = ls.parser.token_registry.get_matching_tokens(str(token))
+    matching_tokens = parser.token_registry.get_matching_tokens(str(token))
 
     edits = [
         lsp.TextEdit(
@@ -235,7 +256,7 @@ def rename(ls: SPINAsmLanguageServer, params: lsp.RenameParams):
 
 
 def start() -> None:
-    LSP_SERVER.start_io()
+    server.start_io()
 
 
 if __name__ == "__main__":
diff --git a/src/spinasm_lsp/utils.py b/src/spinasm_lsp/utils.py
index 05e4a15..b3e53d3 100644
--- a/src/spinasm_lsp/utils.py
+++ b/src/spinasm_lsp/utils.py
@@ -1,9 +1,10 @@
 from __future__ import annotations
 
 import bisect
+import copy
 from collections import UserDict
 from dataclasses import dataclass
-from typing import Callable, Literal, TypedDict
+from typing import Callable, Literal, TypedDict, cast
 
 # Types of tokens defined by asfv1
 TokenType = Literal[
@@ -94,9 +95,18 @@ def register_token(self, token: Token) -> None:
         self.lines[token.line].append(token)
         self._prev_token = token
 
-        # Store user-defined tokens in the registry. Other token types could be stored,
+        # Store user-defined tokens in the directory. Other token types could be stored,
         # but currently there's no use case for retrieving their positions.
         if token.value["type"] in ("LABEL", "TARGET"):
+            # Remove address modifiers when storing tokens in the directory. This allows
+            # for renaming modified tokens along with the original.
+            if str(token).endswith("#") or str(token).endswith("^"):
+                token = copy.deepcopy(token)
+                token.value["stxt"] = cast(str, token.value["stxt"])[:-1]
+                # We need to truncate the range to exclude the modifier, or else it will
+                # be removed during renaming.
+                token.col_end -= 1
+
             if str(token) not in self.directory:
                 self.directory[str(token)] = []
             self.directory[str(token)].append(token)
diff --git a/tests/conftest.py b/tests/conftest.py
index b32b615..e6f1efd 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,5 +1,206 @@
+from __future__ import annotations
+
 from pathlib import Path
+from typing import TypedDict
+
+import lsprotocol.types as lsp
 
 PATCH_DIR = Path(__file__).parent / "patches"
 TEST_PATCHES = list(PATCH_DIR.glob("*.spn"))
 assert TEST_PATCHES, "No test patches found in the patches directory."
+
+
+class AssignmentDict(TypedDict):
+    """A dictionary track where a symbol is referenced and defined."""
+
+    symbol: str
+    referenced: lsp.Position
+    defined: lsp.Location
+
+
+class HoverDict(TypedDict):
+    """A dictionary to record hover information for a symbol."""
+
+    symbol: str
+    position: lsp.Position
+    contains: str
+
+
+class PrepareRenameDict(TypedDict):
+    """A dictionary to record prepare rename results for a symbol."""
+
+    symbol: str
+    position: lsp.Position
+    result: bool
+    message: str | None
+
+
+class RenameDict(TypedDict):
+    """A dictionary to record rename results for a symbol."""
+
+    symbol: str
+    rename_to: str
+    position: lsp.Position
+    changes: list[lsp.TextEdit]
+
+
+# Example assignments from the "Basic.spn" patch, for testing definition locations
+ASSIGNMENTS: list[AssignmentDict] = [
+    {
+        # Variable
+        "symbol": "apout",
+        "referenced": lsp.Position(line=57, character=7),
+        "defined": lsp.Location(
+            uri=f"file:///{PATCH_DIR / 'Basic.spn'}",
+            range=lsp.Range(
+                start=lsp.Position(line=23, character=4),
+                end=lsp.Position(line=23, character=4),
+            ),
+        ),
+    },
+    {
+        # Memory
+        "symbol": "lap2a",
+        "referenced": lsp.Position(line=72, character=7),
+        "defined": lsp.Location(
+            uri=f"file:///{PATCH_DIR / 'Basic.spn'}",
+            range=lsp.Range(
+                start=lsp.Position(line=16, character=4),
+                end=lsp.Position(line=16, character=4),
+            ),
+        ),
+    },
+    {
+        # Memory. Note that this has an address modifier, but still points to the
+        # original definition.
+        "symbol": "lap2a#",
+        "referenced": lsp.Position(line=71, character=7),
+        "defined": lsp.Location(
+            uri=f"file:///{PATCH_DIR / 'Basic.spn'}",
+            range=lsp.Range(
+                start=lsp.Position(line=16, character=4),
+                end=lsp.Position(line=16, character=4),
+            ),
+        ),
+    },
+    {
+        # Label
+        "symbol": "endclr",
+        "referenced": lsp.Position(line=37, character=9),
+        "defined": lsp.Location(
+            uri=f"file:///{PATCH_DIR / 'Basic.spn'}",
+            range=lsp.Range(
+                start=lsp.Position(line=41, character=0),
+                end=lsp.Position(line=41, character=0),
+            ),
+        ),
+    },
+]
+
+
+# Example hovers from the "Basic.spn" patch, for testing hover info
+HOVERS: list[HoverDict] = [
+    {
+        "symbol": "mem",
+        "position": lsp.Position(line=8, character=0),
+        "contains": "## `MEM`",
+    },
+    {
+        "symbol": "skp",
+        "position": lsp.Position(line=37, character=2),
+        "contains": "## `SKP CMASK,N`",
+    },
+    {
+        "symbol": "label",
+        "position": lsp.Position(line=37, character=14),
+        "contains": "(label) ENDCLR: Offset[**4**]",
+    },
+    {
+        "symbol": "variable",
+        "position": lsp.Position(line=47, character=5),
+        "contains": "(constant) MONO: Literal[**32**]",
+    },
+    {
+        "symbol": "modified_address",
+        "position": lsp.Position(line=73, character=4),
+        "contains": "(constant) LAP2B#: Literal[**9802**]",
+    },
+    {
+        # CHO RDA, hovering over CHO
+        "symbol": "cho_rda",
+        "position": lsp.Position(line=85, character=0),
+        "contains": "## `CHO RDA N, C, D`",
+    },
+    {
+        # CHO RDA, hovering over RDA
+        "symbol": "cho_rda",
+        "position": lsp.Position(line=85, character=4),
+        "contains": "## `CHO RDA N, C, D`",
+    },
+]
+
+
+PREPARE_RENAMES: list[PrepareRenameDict] = [
+    {
+        "symbol": "mem",
+        "position": lsp.Position(line=8, character=0),
+        "result": None,
+        "message": "Can't rename non-user defined token MEM.",
+    },
+    {
+        "symbol": "reg0",
+        "position": lsp.Position(line=22, character=10),
+        "result": None,
+        "message": "Can't rename non-user defined token REG0.",
+    },
+    {
+        "symbol": "ap1",
+        "position": lsp.Position(line=8, character=4),
+        "result": lsp.PrepareRenameResult_Type2(default_behavior=True),
+        "message": None,
+    },
+    {
+        "symbol": "endclr",
+        "position": lsp.Position(line=37, character=10),
+        "result": lsp.PrepareRenameResult_Type2(default_behavior=True),
+        "message": None,
+    },
+]
+
+RENAMES: list[RenameDict] = [
+    {
+        "symbol": "ap1",
+        "rename_to": "FOO",
+        "position": lsp.Position(line=8, character=4),
+        "changes": [
+            lsp.TextEdit(
+                range=lsp.Range(start=lsp.Position(8, 4), end=lsp.Position(8, 7)),
+                new_text="FOO",
+            ),
+            # This symbol is ap1#, and should be matched when renaming ap1
+            lsp.TextEdit(
+                range=lsp.Range(start=lsp.Position(51, 4), end=lsp.Position(51, 7)),
+                new_text="FOO",
+            ),
+            lsp.TextEdit(
+                range=lsp.Range(start=lsp.Position(52, 5), end=lsp.Position(52, 8)),
+                new_text="FOO",
+            ),
+        ],
+    },
+    {
+        "symbol": "label",
+        "rename_to": "END",
+        "position": lsp.Position(line=41, character=0),
+        "changes": [
+            lsp.TextEdit(
+                range=lsp.Range(start=lsp.Position(37, 8), end=lsp.Position(37, 14)),
+                new_text="END",
+            ),
+            lsp.TextEdit(
+                range=lsp.Range(start=lsp.Position(41, 0), end=lsp.Position(41, 6)),
+                new_text="END",
+            ),
+        ],
+    },
+]
diff --git a/tests/test_server.py b/tests/test_server.py
new file mode 100644
index 0000000..23ac7a1
--- /dev/null
+++ b/tests/test_server.py
@@ -0,0 +1,203 @@
+import lsprotocol.types as lsp
+import pytest
+import pytest_lsp
+from pytest_lsp import ClientServerConfig, LanguageClient
+
+from .conftest import (
+    ASSIGNMENTS,
+    HOVERS,
+    PATCH_DIR,
+    PREPARE_RENAMES,
+    RENAMES,
+    PrepareRenameDict,
+    RenameDict,
+)
+
+
+@pytest_lsp.fixture(
+    params=["neovim", "visual_studio_code"],
+    config=ClientServerConfig(server_command=["spinasm-lsp"]),
+)
+async def client(request, lsp_client: LanguageClient):
+    # Setup the server
+    params = lsp.InitializeParams(
+        capabilities=pytest_lsp.client_capabilities(request.param)
+    )
+
+    await lsp_client.initialize_session(params)
+    yield
+
+    # Shutdown the server after the test
+    await lsp_client.shutdown_session()
+
+
+@pytest.mark.asyncio()
+@pytest.mark.parametrize("assignment", ASSIGNMENTS, ids=lambda x: x["symbol"])
+async def test_definition(assignment: dict, client: LanguageClient):
+    """Test that the definition location of different assignments is correct."""
+    uri = assignment["defined"].uri
+    result = await client.text_document_definition_async(
+        params=lsp.DefinitionParams(
+            position=assignment["referenced"],
+            text_document=lsp.TextDocumentIdentifier(uri=uri),
+        )
+    )
+
+    assert result == assignment["defined"]
+
+
+@pytest.mark.asyncio()
+async def test_completions(client: LanguageClient):
+    patch = PATCH_DIR / "Basic.spn"
+
+    results = await client.text_document_completion_async(
+        params=lsp.CompletionParams(
+            position=lsp.Position(line=0, character=0),
+            text_document=lsp.TextDocumentIdentifier(uri=f"file:///{patch.absolute()}"),
+        )
+    )
+    assert results is not None, "Expected completions"
+    completions = [item.label for item in results.items]
+
+    expected_completions = [
+        # Memory locations
+        "AP1",
+        "LAP1A",
+        "D2",
+        # Variables
+        "MONO",
+        "APOUT",
+        "KRF",
+        # Constants
+        "REG0",
+        "SIN0",
+        # Opcodes
+        "SOF",
+        "MULX",
+        "WRAX",
+    ]
+
+    for completion in expected_completions:
+        assert completion in completions, f"Expected completion {completion} not found"
+
+
+@pytest.mark.asyncio()
+async def test_diagnostic_parsing_errors(client: LanguageClient):
+    """Test that parsing errors and warnings are correctly reported by the server."""
+    source_with_errors = """
+; Undefined symbol a
+SOF 0,a
+
+; Label REG0 re-defined
+REG0 EQU 4
+
+; Register out of range
+MULX 100
+"""
+
+    # We need a URI to associate with the source, but it doesn't need to be a real file.
+    test_uri = "dummy_uri"
+    client.text_document_did_open(
+        lsp.DidOpenTextDocumentParams(
+            text_document=lsp.TextDocumentItem(
+                uri=test_uri,
+                language_id="spinasm",
+                version=1,
+                text=source_with_errors,
+            )
+        )
+    )
+
+    await client.wait_for_notification(lsp.TEXT_DOCUMENT_PUBLISH_DIAGNOSTICS)
+
+    expected = [
+        lsp.Diagnostic(
+            range=lsp.Range(
+                start=lsp.Position(line=2, character=6),
+                end=lsp.Position(line=2, character=6),
+            ),
+            message="Undefined label a",
+            severity=lsp.DiagnosticSeverity.Error,
+            source="SPINAsm",
+        ),
+        lsp.Diagnostic(
+            range=lsp.Range(
+                start=lsp.Position(line=5, character=9),
+                end=lsp.Position(line=5, character=9),
+            ),
+            message="Label REG0 re-defined",
+            severity=lsp.DiagnosticSeverity.Warning,
+            source="SPINAsm",
+        ),
+        lsp.Diagnostic(
+            range=lsp.Range(
+                start=lsp.Position(line=8, character=0),
+                end=lsp.Position(line=8, character=0),
+            ),
+            message="Register 0x64 out of range for MULX",
+            severity=lsp.DiagnosticSeverity.Error,
+            source="SPINAsm",
+        ),
+    ]
+
+    returned = client.diagnostics[test_uri]
+    extra = len(returned) - len(expected)
+    assert extra == 0, f"Expected {len(expected)} diagnostics, got {len(returned)}."
+
+    for i, diag in enumerate(expected):
+        assert diag == returned[i], f"Diagnostic {i} does not match expected"
+
+
+@pytest.mark.parametrize("hover", HOVERS, ids=lambda x: x["symbol"])
+@pytest.mark.asyncio()
+async def test_hover(hover: dict, client: LanguageClient):
+    patch = PATCH_DIR / "Basic.spn"
+
+    result = await client.text_document_hover_async(
+        params=lsp.CompletionParams(
+            position=hover["position"],
+            text_document=lsp.TextDocumentIdentifier(uri=f"file:///{patch.absolute()}"),
+        )
+    )
+
+    assert hover["contains"] in result.contents.value
+
+
+@pytest.mark.parametrize("prepare", PREPARE_RENAMES, ids=lambda x: x["symbol"])
+@pytest.mark.asyncio()
+async def test_prepare_rename(prepare: PrepareRenameDict, client: LanguageClient):
+    """Test that prepare rename prevents renaming non-user defined tokens."""
+    patch = PATCH_DIR / "Basic.spn"
+
+    result = await client.text_document_prepare_rename_async(
+        params=lsp.PrepareRenameParams(
+            position=prepare["position"],
+            text_document=lsp.TextDocumentIdentifier(uri=f"file:///{patch.absolute()}"),
+        )
+    )
+
+    assert result == prepare["result"]
+
+    if prepare["message"]:
+        assert prepare["message"] in client.log_messages[0].message
+        assert client.log_messages[0].type == lsp.MessageType.Info
+    else:
+        assert not client.log_messages
+
+
+@pytest.mark.parametrize("rename", RENAMES, ids=lambda x: x["symbol"])
+@pytest.mark.asyncio()
+async def test_rename(rename: RenameDict, client: LanguageClient):
+    """Test that renaming a symbol suggests the correct edits."""
+    patch = PATCH_DIR / "Basic.spn"
+
+    uri = f"file:///{patch.absolute()}"
+    result = await client.text_document_rename_async(
+        params=lsp.RenameParams(
+            position=rename["position"],
+            new_name=rename["rename_to"],
+            text_document=lsp.TextDocumentIdentifier(uri=uri),
+        )
+    )
+
+    assert result.changes[uri] == rename["changes"]

From 5ef6f762e36f48496089ec4c633333ad54393f31 Mon Sep 17 00:00:00 2001
From: Aaron Zuspan <aa.zuspan@gmail.com>
Date: Sat, 10 Aug 2024 19:21:39 -0700
Subject: [PATCH 05/18] Switch defintion to use token registry

---
 src/spinasm_lsp/server.py | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/src/spinasm_lsp/server.py b/src/spinasm_lsp/server.py
index 6e57ec1..4d12002 100644
--- a/src/spinasm_lsp/server.py
+++ b/src/spinasm_lsp/server.py
@@ -188,23 +188,18 @@ async def definition(
     document = ls.workspace.get_text_document(params.text_document.uri)
     pos = params.position
 
-    # TODO: Probably switch to token registry. Note that when I do, it might break
-    # definitions for addresses with # and ^, maybe?
-    try:
-        word = document.word_at_position(pos).upper()
-    except IndexError:
-        return None
+    token = parser.token_registry.get_token_at_position(pos.line, pos.character)
 
-    if word in parser.definitions:
-        return lsp.Location(
-            uri=document.uri,
-            range=lsp.Range(
-                start=parser.definitions[word],
-                end=parser.definitions[word],
-            ),
-        )
+    if str(token) not in parser.definitions:
+        return None
 
-    return None
+    return lsp.Location(
+        uri=document.uri,
+        range=lsp.Range(
+            start=parser.definitions[str(token)],
+            end=parser.definitions[str(token)],
+        ),
+    )
 
 
 @server.feature(lsp.TEXT_DOCUMENT_PREPARE_RENAME)

From 51d36ce2a440f5ed867f02392c6e7975e16fb5f4 Mon Sep 17 00:00:00 2001
From: Aaron Zuspan <aa.zuspan@gmail.com>
Date: Sat, 10 Aug 2024 21:05:55 -0700
Subject: [PATCH 06/18] Use lsp.Position and lsp.Range for tracking tokens

---
 src/spinasm_lsp/parser.py |  8 +++++---
 src/spinasm_lsp/server.py | 21 ++++++-------------
 src/spinasm_lsp/utils.py  | 43 +++++++++++++++++++--------------------
 tests/test_server.py      |  3 ++-
 tests/test_utils.py       | 23 +++++++++++++--------
 5 files changed, 48 insertions(+), 50 deletions(-)

diff --git a/src/spinasm_lsp/parser.py b/src/spinasm_lsp/parser.py
index 76f4218..d073970 100644
--- a/src/spinasm_lsp/parser.py
+++ b/src/spinasm_lsp/parser.py
@@ -136,12 +136,14 @@ def previous_line(self):
     def __next__(self):
         """Parse the next symbol and update the column."""
         super().__next__()
-        # TODO: Make sure super().__next__ can't get stuck in an infinite loop since I
-        # removed the maxerr check
         self._update_column()
 
         token_width = len(self.sym["txt"] or "")
-        token = Token(self.sym, self.current_line, self.col, self.col + token_width)
+        token_range = lsp.Range(
+            start=lsp.Position(line=self.current_line, character=self.col),
+            end=lsp.Position(line=self.current_line, character=self.col + token_width),
+        )
+        token = Token(self.sym, range=token_range)
         self.token_registry.register_token(token)
 
     def _update_column(self):
diff --git a/src/spinasm_lsp/server.py b/src/spinasm_lsp/server.py
index 4d12002..68a63c4 100644
--- a/src/spinasm_lsp/server.py
+++ b/src/spinasm_lsp/server.py
@@ -95,9 +95,7 @@ async def hover(ls: SPINAsmLanguageServer, params: lsp.HoverParams) -> lsp.Hover
     """Retrieve documentation from symbols on hover."""
     parser = await ls.get_parser(params.text_document.uri)
 
-    pos = params.position
-
-    token = parser.token_registry.get_token_at_position(pos.line, pos.character)
+    token = parser.token_registry.get_token_at_position(params.position)
     if token is None:
         return None
 
@@ -186,9 +184,8 @@ async def definition(
     parser = await ls.get_parser(params.text_document.uri)
 
     document = ls.workspace.get_text_document(params.text_document.uri)
-    pos = params.position
 
-    token = parser.token_registry.get_token_at_position(pos.line, pos.character)
+    token = parser.token_registry.get_token_at_position(params.position)
 
     if str(token) not in parser.definitions:
         return None
@@ -208,10 +205,9 @@ async def prepare_rename(ls: SPINAsmLanguageServer, params: lsp.PrepareRenamePar
     is a valid operation."""
     parser = await ls.get_parser(params.text_document.uri)
 
-    pos = params.position
-    token = parser.token_registry.get_token_at_position(pos.line, pos.character)
+    token = parser.token_registry.get_token_at_position(params.position)
     if token is None:
-        ls.info(f"No token to rename at {pos}.")
+        ls.info(f"No token to rename at {params.position}.")
         return None
 
     # Only user-defined labels should support renaming
@@ -228,9 +224,7 @@ async def prepare_rename(ls: SPINAsmLanguageServer, params: lsp.PrepareRenamePar
 async def rename(ls: SPINAsmLanguageServer, params: lsp.RenameParams):
     parser = await ls.get_parser(params.text_document.uri)
 
-    pos = params.position
-
-    token = parser.token_registry.get_token_at_position(pos.line, pos.character)
+    token = parser.token_registry.get_token_at_position(params.position)
     if token is None:
         return None
 
@@ -238,10 +232,7 @@ async def rename(ls: SPINAsmLanguageServer, params: lsp.RenameParams):
 
     edits = [
         lsp.TextEdit(
-            range=lsp.Range(
-                start=lsp.Position(t.line, t.col_start),
-                end=lsp.Position(t.line, t.col_end),
-            ),
+            range=t.range,
             new_text=params.new_name,
         )
         for t in matching_tokens
diff --git a/src/spinasm_lsp/utils.py b/src/spinasm_lsp/utils.py
index b3e53d3..1dcc988 100644
--- a/src/spinasm_lsp/utils.py
+++ b/src/spinasm_lsp/utils.py
@@ -6,6 +6,8 @@
 from dataclasses import dataclass
 from typing import Callable, Literal, TypedDict, cast
 
+import lsprotocol.types as lsp
+
 # Types of tokens defined by asfv1
 TokenType = Literal[
     "ASSEMBLER",
@@ -41,15 +43,12 @@ class TokenValue(TypedDict):
     val: int | float | None
 
 
-# TODO: Probably use lsp.Position and lsp.Range to define the token location
 @dataclass
 class Token:
     """A token and its position in a source file."""
 
     value: TokenValue
-    line: int
-    col_start: int
-    col_end: int
+    range: lsp.Range
     next_token: Token | None = None
     prev_token: Token | None = None
 
@@ -83,8 +82,8 @@ def register_token(self, token: Token) -> None:
         # will break `get_token_at_position`. I could check if prev token was CHO when
         # I register RDAL, SOF, or RDA, and if so register them as one and unregister
         # the previous?
-        if token.line not in self.lines:
-            self.lines[token.line] = []
+        if token.range.start.line not in self.lines:
+            self.lines[token.range.start.line] = []
 
         # Record the previous and next token for each token to allow traversing
         if self._prev_token:
@@ -92,7 +91,7 @@ def register_token(self, token: Token) -> None:
             self._prev_token.next_token = token
 
         # Store the token on its line
-        self.lines[token.line].append(token)
+        self.lines[token.range.start.line].append(token)
         self._prev_token = token
 
         # Store user-defined tokens in the directory. Other token types could be stored,
@@ -105,7 +104,7 @@ def register_token(self, token: Token) -> None:
                 token.value["stxt"] = cast(str, token.value["stxt"])[:-1]
                 # We need to truncate the range to exclude the modifier, or else it will
                 # be removed during renaming.
-                token.col_end -= 1
+                token.range.end.character -= 1
 
             if str(token) not in self.directory:
                 self.directory[str(token)] = []
@@ -115,26 +114,26 @@ def get_matching_tokens(self, token_name: str) -> list[Token]:
         """Retrieve all tokens with a given name in the program."""
         return self.directory.get(token_name.upper(), [])
 
-    def get_token_at_position(self, line: int, col: int) -> Token | None:
+    def get_token_at_position(self, position: lsp.Position) -> Token | None:
         """Retrieve the token at the given line and column index."""
-        if line not in self.lines:
+        if position.line not in self.lines:
             return None
 
-        line_tokens = self.lines[line]
-        token_starts = [t.col_start for t in line_tokens]
-        token_ends = [t.col_end for t in line_tokens]
+        line_tokens = self.lines[position.line]
+        token_starts = [t.range.start.character for t in line_tokens]
+        token_ends = [t.range.end.character for t in line_tokens]
 
-        pos = bisect.bisect_left(token_starts, col)
+        idx = bisect.bisect_left(token_starts, position.character)
 
-        # The index returned by bisect_left points to the start value >= col. This will
-        # either be the first character of the token or the start of the next token.
-        # First check if we're out of bounds, then shift left unless we're at the first
-        # character of the token.
-        if pos == len(line_tokens) or token_starts[pos] != col:
-            pos -= 1
+        # The index returned by bisect_left points to the start value >= character. This
+        # will either be the first character of the token or the start of the next
+        # token. First check if we're out of bounds, then shift left unless we're at the
+        # first character of the token.
+        if idx == len(line_tokens) or token_starts[idx] != position.character:
+            idx -= 1
 
         # If the col falls after the end of the token, we're not inside a token.
-        if col > token_ends[pos]:
+        if position.character > token_ends[idx]:
             return None
 
-        return line_tokens[pos]
+        return line_tokens[idx]
diff --git a/tests/test_server.py b/tests/test_server.py
index 23ac7a1..be8dce7 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -15,7 +15,8 @@
 
 
 @pytest_lsp.fixture(
-    params=["neovim", "visual_studio_code"],
+    # params=["neovim", "visual_studio_code"],
+    params=["visual_studio_code"],
     config=ClientServerConfig(server_command=["spinasm-lsp"]),
 )
 async def client(request, lsp_client: LanguageClient):
diff --git a/tests/test_utils.py b/tests/test_utils.py
index f22a650..74dedad 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,7 +1,6 @@
 """Test LSP utilities."""
 
-import itertools
-
+import lsprotocol.types as lsp
 import pytest
 
 from spinasm_lsp import utils
@@ -26,7 +25,15 @@ def sentence_token_registry() -> tuple[str, utils.TokenRegistry]:
         start = sentence.index(t["txt"], col)
         end = start + len(t["txt"]) - 1
         col = end + 1
-        tokens.append(utils.Token(t, line=0, col_start=start, col_end=end))
+        tokens.append(
+            utils.Token(
+                t,
+                range=lsp.Range(
+                    start=lsp.Position(line=0, character=start),
+                    end=lsp.Position(line=0, character=end),
+                ),
+            )
+        )
 
     return sentence, utils.TokenRegistry(tokens)
 
@@ -64,19 +71,17 @@ def test_get_token_from_registry(sentence_token_registry):
         token_positions[i] = "words."
 
     for i, word in token_positions.items():
-        found_tok = reg.get_token_at_position(line=0, col=i)
+        found_tok = reg.get_token_at_position(lsp.Position(line=0, character=i))
         found_val = found_tok.value["txt"] if found_tok is not None else found_tok
         msg = f"Expected token `{word}` at col {i}, found `{found_val}`"
         assert found_val == word, msg
 
 
-@pytest.mark.parametrize("idx", list(itertools.product([-99, 99], [-99, 99])), ids=str)
-def test_get_token_at_invalid_position_returns_none(idx, sentence_token_registry):
+def test_get_token_at_invalid_position_returns_none(sentence_token_registry):
     """Test that retrieving tokens from out of bounds always returns None."""
-    line, col = idx
     _, reg = sentence_token_registry
 
-    assert reg.get_token_at_position(line=line, col=col) is None
+    assert reg.get_token_at_position(lsp.Position(line=99, character=99)) is None
 
 
 def test_get_token_positions():
@@ -89,4 +94,4 @@ def test_get_token_positions():
 
     all_matches = parser.token_registry.get_matching_tokens("apout")
     assert len(all_matches) == 4
-    assert [t.line for t in all_matches] == [23, 57, 60, 70]
+    assert [t.range.start.line for t in all_matches] == [23, 57, 60, 70]

From cee6849e7cb2535f38444ee3fb63f078f3b30ce7 Mon Sep 17 00:00:00 2001
From: Aaron Zuspan <aa.zuspan@gmail.com>
Date: Sat, 10 Aug 2024 21:29:53 -0700
Subject: [PATCH 07/18] Refactor

---
 src/spinasm_lsp/utils.py | 58 +++++++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 22 deletions(-)

diff --git a/src/spinasm_lsp/utils.py b/src/spinasm_lsp/utils.py
index 1dcc988..a666dad 100644
--- a/src/spinasm_lsp/utils.py
+++ b/src/spinasm_lsp/utils.py
@@ -58,6 +58,23 @@ def __str__(self):
     def __repr__(self):
         return str(self)
 
+    def clone(self) -> Token:
+        """Return a clone of the token to avoid mutating the original."""
+        return copy.deepcopy(self)
+
+    def without_address_modifier(self) -> Token:
+        """
+        Create a clone of the token with the address modifier removed.
+        """
+        if not str(self).endswith("#") and not str(self).endswith("^"):
+            return self
+
+        token = self.clone()
+        token.value["stxt"] = cast(str, token.value["stxt"])[:-1]
+        token.range.end.character -= 1
+
+        return token
+
 
 class TokenRegistry:
     """A registry of tokens and their positions in a source file."""
@@ -66,10 +83,10 @@ def __init__(self, tokens: list[Token] | None = None) -> None:
         self._prev_token: Token | None = None
 
         """A dictionary mapping program lines to all Tokens on that line."""
-        self.lines: dict[int, list[Token]] = {}
+        self.tokens_by_line: dict[int, list[Token]] = {}
 
         """A dictionary mapping token names to all matching Tokens in the program."""
-        self.directory: dict[str, list[Token]] = {}
+        self.tokens_by_name: dict[str, list[Token]] = {}
 
         for token in tokens or []:
             self.register_token(token)
@@ -82,8 +99,8 @@ def register_token(self, token: Token) -> None:
         # will break `get_token_at_position`. I could check if prev token was CHO when
         # I register RDAL, SOF, or RDA, and if so register them as one and unregister
         # the previous?
-        if token.range.start.line not in self.lines:
-            self.lines[token.range.start.line] = []
+        if token.range.start.line not in self.tokens_by_line:
+            self.tokens_by_line[token.range.start.line] = []
 
         # Record the previous and next token for each token to allow traversing
         if self._prev_token:
@@ -91,35 +108,32 @@ def register_token(self, token: Token) -> None:
             self._prev_token.next_token = token
 
         # Store the token on its line
-        self.lines[token.range.start.line].append(token)
+        self.tokens_by_line[token.range.start.line].append(token)
         self._prev_token = token
 
-        # Store user-defined tokens in the directory. Other token types could be stored,
+        # Store user-defined tokens together by name. Other token types could be stored,
         # but currently there's no use case for retrieving their positions.
         if token.value["type"] in ("LABEL", "TARGET"):
-            # Remove address modifiers when storing tokens in the directory. This allows
-            # for renaming modified tokens along with the original.
-            if str(token).endswith("#") or str(token).endswith("^"):
-                token = copy.deepcopy(token)
-                token.value["stxt"] = cast(str, token.value["stxt"])[:-1]
-                # We need to truncate the range to exclude the modifier, or else it will
-                # be removed during renaming.
-                token.range.end.character -= 1
-
-            if str(token) not in self.directory:
-                self.directory[str(token)] = []
-            self.directory[str(token)].append(token)
+            # Tokens are stored by name without address modifiers, so that e.g. Delay#
+            # and Delay can be retrieved with the same query. This allows for renaming
+            # all instances of a memory token.
+            token = token.without_address_modifier()
+
+            if str(token) not in self.tokens_by_name:
+                self.tokens_by_name[str(token)] = []
+
+            self.tokens_by_name[str(token)].append(token)
 
     def get_matching_tokens(self, token_name: str) -> list[Token]:
         """Retrieve all tokens with a given name in the program."""
-        return self.directory.get(token_name.upper(), [])
+        return self.tokens_by_name.get(token_name.upper(), [])
 
     def get_token_at_position(self, position: lsp.Position) -> Token | None:
-        """Retrieve the token at the given line and column index."""
-        if position.line not in self.lines:
+        """Retrieve the token at the given position."""
+        if position.line not in self.tokens_by_line:
             return None
 
-        line_tokens = self.lines[position.line]
+        line_tokens = self.tokens_by_line[position.line]
         token_starts = [t.range.start.character for t in line_tokens]
         token_ends = [t.range.end.character for t in line_tokens]
 

From 37d1ac9a8842afedda05c41898881a63cd0177d4 Mon Sep 17 00:00:00 2001
From: Aaron Zuspan <aa.zuspan@gmail.com>
Date: Sat, 10 Aug 2024 21:38:52 -0700
Subject: [PATCH 08/18] Fix renaming via modded addrs, e.g. Delay#

---
 src/spinasm_lsp/server.py |  3 +++
 tests/conftest.py         | 22 +++++++++++++++++++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/spinasm_lsp/server.py b/src/spinasm_lsp/server.py
index 68a63c4..52bb757 100644
--- a/src/spinasm_lsp/server.py
+++ b/src/spinasm_lsp/server.py
@@ -228,6 +228,9 @@ async def rename(ls: SPINAsmLanguageServer, params: lsp.RenameParams):
     if token is None:
         return None
 
+    # For renaming purposes, we ignore address modifiers so that e.g. we can rename
+    # `Delay` by renaming `Delay#`
+    token = token.without_address_modifier()
     matching_tokens = parser.token_registry.get_matching_tokens(str(token))
 
     edits = [
diff --git a/tests/conftest.py b/tests/conftest.py
index e6f1efd..5ae76fd 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -177,7 +177,7 @@ class RenameDict(TypedDict):
                 range=lsp.Range(start=lsp.Position(8, 4), end=lsp.Position(8, 7)),
                 new_text="FOO",
             ),
-            # This symbol is ap1#, and should be matched when renaming ap1
+            # This symbol is `ap1#``, and should be matched when renaming `ap1`
             lsp.TextEdit(
                 range=lsp.Range(start=lsp.Position(51, 4), end=lsp.Position(51, 7)),
                 new_text="FOO",
@@ -203,4 +203,24 @@ class RenameDict(TypedDict):
             ),
         ],
     },
+    {
+        "symbol": "lap1a#",
+        "rename_to": "FOO",
+        "position": lsp.Position(line=61, character=4),
+        "changes": [
+            # Renaming `lap1a#` should also rename `lap1a`
+            lsp.TextEdit(
+                range=lsp.Range(start=lsp.Position(12, 4), end=lsp.Position(12, 9)),
+                new_text="FOO",
+            ),
+            lsp.TextEdit(
+                range=lsp.Range(start=lsp.Position(61, 4), end=lsp.Position(61, 9)),
+                new_text="FOO",
+            ),
+            lsp.TextEdit(
+                range=lsp.Range(start=lsp.Position(62, 5), end=lsp.Position(62, 10)),
+                new_text="FOO",
+            ),
+        ],
+    },
 ]

From 6e4140a21a1365a39418bee07c9f0231d18403d8 Mon Sep 17 00:00:00 2001
From: Aaron Zuspan <aa.zuspan@gmail.com>
Date: Sun, 11 Aug 2024 10:46:53 -0700
Subject: [PATCH 09/18] Refactor and fix token width

---
 src/spinasm_lsp/parser.py |  2 +-
 src/spinasm_lsp/server.py |  4 ++--
 src/spinasm_lsp/utils.py  | 10 +++++-----
 tests/conftest.py         | 30 +++++++++++++++---------------
 tests/test_utils.py       |  2 +-
 5 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/src/spinasm_lsp/parser.py b/src/spinasm_lsp/parser.py
index d073970..c44ccae 100644
--- a/src/spinasm_lsp/parser.py
+++ b/src/spinasm_lsp/parser.py
@@ -138,7 +138,7 @@ def __next__(self):
         super().__next__()
         self._update_column()
 
-        token_width = len(self.sym["txt"] or "")
+        token_width = max(len(self.sym["txt"] or "") - 1, 0)
         token_range = lsp.Range(
             start=lsp.Position(line=self.current_line, character=self.col),
             end=lsp.Position(line=self.current_line, character=self.col + token_width),
diff --git a/src/spinasm_lsp/server.py b/src/spinasm_lsp/server.py
index 52bb757..95d15a0 100644
--- a/src/spinasm_lsp/server.py
+++ b/src/spinasm_lsp/server.py
@@ -99,8 +99,8 @@ async def hover(ls: SPINAsmLanguageServer, params: lsp.HoverParams) -> lsp.Hover
     if token is None:
         return None
 
-    token_val = token.value["stxt"]
-    token_type = token.value["type"]
+    token_val = token.symbol["stxt"]
+    token_type = token.symbol["type"]
 
     # stxt should only be None for EOF tokens, but check to be sure.
     if token_type == "EOF" or not isinstance(token_val, str):
diff --git a/src/spinasm_lsp/utils.py b/src/spinasm_lsp/utils.py
index a666dad..7ad6c71 100644
--- a/src/spinasm_lsp/utils.py
+++ b/src/spinasm_lsp/utils.py
@@ -34,7 +34,7 @@ def __setitem__(self, key, value):
         self.callback(key)
 
 
-class TokenValue(TypedDict):
+class Symbol(TypedDict):
     """The token specification used by asfv1."""
 
     type: TokenType
@@ -47,13 +47,13 @@ class TokenValue(TypedDict):
 class Token:
     """A token and its position in a source file."""
 
-    value: TokenValue
+    symbol: Symbol
     range: lsp.Range
     next_token: Token | None = None
     prev_token: Token | None = None
 
     def __str__(self):
-        return self.value["stxt"] or "Empty token"
+        return self.symbol["stxt"] or "Empty token"
 
     def __repr__(self):
         return str(self)
@@ -70,7 +70,7 @@ def without_address_modifier(self) -> Token:
             return self
 
         token = self.clone()
-        token.value["stxt"] = cast(str, token.value["stxt"])[:-1]
+        token.symbol["stxt"] = cast(str, token.symbol["stxt"])[:-1]
         token.range.end.character -= 1
 
         return token
@@ -113,7 +113,7 @@ def register_token(self, token: Token) -> None:
 
         # Store user-defined tokens together by name. Other token types could be stored,
         # but currently there's no use case for retrieving their positions.
-        if token.value["type"] in ("LABEL", "TARGET"):
+        if token.symbol["type"] in ("LABEL", "TARGET"):
             # Tokens are stored by name without address modifiers, so that e.g. Delay#
             # and Delay can be retrieved with the same query. This allows for renaming
             # all instances of a memory token.
diff --git a/tests/conftest.py b/tests/conftest.py
index 5ae76fd..8afdb26 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -111,29 +111,29 @@ class RenameDict(TypedDict):
         "contains": "## `SKP CMASK,N`",
     },
     {
-        "symbol": "label",
-        "position": lsp.Position(line=37, character=14),
+        "symbol": "endclr",
+        "position": lsp.Position(line=37, character=13),
         "contains": "(label) ENDCLR: Offset[**4**]",
     },
     {
-        "symbol": "variable",
+        "symbol": "mono",
         "position": lsp.Position(line=47, character=5),
         "contains": "(constant) MONO: Literal[**32**]",
     },
     {
-        "symbol": "modified_address",
+        "symbol": "lap2b#",
         "position": lsp.Position(line=73, character=4),
         "contains": "(constant) LAP2B#: Literal[**9802**]",
     },
     {
         # CHO RDA, hovering over CHO
-        "symbol": "cho_rda",
+        "symbol": "CHO_rda",
         "position": lsp.Position(line=85, character=0),
         "contains": "## `CHO RDA N, C, D`",
     },
     {
         # CHO RDA, hovering over RDA
-        "symbol": "cho_rda",
+        "symbol": "cho_RDA",
         "position": lsp.Position(line=85, character=4),
         "contains": "## `CHO RDA N, C, D`",
     },
@@ -174,31 +174,31 @@ class RenameDict(TypedDict):
         "position": lsp.Position(line=8, character=4),
         "changes": [
             lsp.TextEdit(
-                range=lsp.Range(start=lsp.Position(8, 4), end=lsp.Position(8, 7)),
+                range=lsp.Range(start=lsp.Position(8, 4), end=lsp.Position(8, 6)),
                 new_text="FOO",
             ),
             # This symbol is `ap1#``, and should be matched when renaming `ap1`
             lsp.TextEdit(
-                range=lsp.Range(start=lsp.Position(51, 4), end=lsp.Position(51, 7)),
+                range=lsp.Range(start=lsp.Position(51, 4), end=lsp.Position(51, 6)),
                 new_text="FOO",
             ),
             lsp.TextEdit(
-                range=lsp.Range(start=lsp.Position(52, 5), end=lsp.Position(52, 8)),
+                range=lsp.Range(start=lsp.Position(52, 5), end=lsp.Position(52, 7)),
                 new_text="FOO",
             ),
         ],
     },
     {
-        "symbol": "label",
+        "symbol": "endclr",
         "rename_to": "END",
         "position": lsp.Position(line=41, character=0),
         "changes": [
             lsp.TextEdit(
-                range=lsp.Range(start=lsp.Position(37, 8), end=lsp.Position(37, 14)),
+                range=lsp.Range(start=lsp.Position(37, 8), end=lsp.Position(37, 13)),
                 new_text="END",
             ),
             lsp.TextEdit(
-                range=lsp.Range(start=lsp.Position(41, 0), end=lsp.Position(41, 6)),
+                range=lsp.Range(start=lsp.Position(41, 0), end=lsp.Position(41, 5)),
                 new_text="END",
             ),
         ],
@@ -210,15 +210,15 @@ class RenameDict(TypedDict):
         "changes": [
             # Renaming `lap1a#` should also rename `lap1a`
             lsp.TextEdit(
-                range=lsp.Range(start=lsp.Position(12, 4), end=lsp.Position(12, 9)),
+                range=lsp.Range(start=lsp.Position(12, 4), end=lsp.Position(12, 8)),
                 new_text="FOO",
             ),
             lsp.TextEdit(
-                range=lsp.Range(start=lsp.Position(61, 4), end=lsp.Position(61, 9)),
+                range=lsp.Range(start=lsp.Position(61, 4), end=lsp.Position(61, 8)),
                 new_text="FOO",
             ),
             lsp.TextEdit(
-                range=lsp.Range(start=lsp.Position(62, 5), end=lsp.Position(62, 10)),
+                range=lsp.Range(start=lsp.Position(62, 5), end=lsp.Position(62, 9)),
                 new_text="FOO",
             ),
         ],
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 74dedad..775ec3e 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -72,7 +72,7 @@ def test_get_token_from_registry(sentence_token_registry):
 
     for i, word in token_positions.items():
         found_tok = reg.get_token_at_position(lsp.Position(line=0, character=i))
-        found_val = found_tok.value["txt"] if found_tok is not None else found_tok
+        found_val = found_tok.symbol["txt"] if found_tok is not None else found_tok
         msg = f"Expected token `{word}` at col {i}, found `{found_val}`"
         assert found_val == word, msg
 

From f4fea3bb08f4695b2b13f36e82daddfac930161b Mon Sep 17 00:00:00 2001
From: Aaron Zuspan <aa.zuspan@gmail.com>
Date: Sun, 11 Aug 2024 10:49:58 -0700
Subject: [PATCH 10/18] Refactor

---
 src/spinasm_lsp/utils.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/spinasm_lsp/utils.py b/src/spinasm_lsp/utils.py
index 7ad6c71..04f6fdb 100644
--- a/src/spinasm_lsp/utils.py
+++ b/src/spinasm_lsp/utils.py
@@ -58,7 +58,7 @@ def __str__(self):
     def __repr__(self):
         return str(self)
 
-    def clone(self) -> Token:
+    def _clone(self) -> Token:
         """Return a clone of the token to avoid mutating the original."""
         return copy.deepcopy(self)
 
@@ -69,7 +69,7 @@ def without_address_modifier(self) -> Token:
         if not str(self).endswith("#") and not str(self).endswith("^"):
             return self
 
-        token = self.clone()
+        token = self._clone()
         token.symbol["stxt"] = cast(str, token.symbol["stxt"])[:-1]
         token.range.end.character -= 1
 
@@ -83,10 +83,10 @@ def __init__(self, tokens: list[Token] | None = None) -> None:
         self._prev_token: Token | None = None
 
         """A dictionary mapping program lines to all Tokens on that line."""
-        self.tokens_by_line: dict[int, list[Token]] = {}
+        self._tokens_by_line: dict[int, list[Token]] = {}
 
         """A dictionary mapping token names to all matching Tokens in the program."""
-        self.tokens_by_name: dict[str, list[Token]] = {}
+        self._tokens_by_name: dict[str, list[Token]] = {}
 
         for token in tokens or []:
             self.register_token(token)
@@ -99,8 +99,8 @@ def register_token(self, token: Token) -> None:
         # will break `get_token_at_position`. I could check if prev token was CHO when
         # I register RDAL, SOF, or RDA, and if so register them as one and unregister
         # the previous?
-        if token.range.start.line not in self.tokens_by_line:
-            self.tokens_by_line[token.range.start.line] = []
+        if token.range.start.line not in self._tokens_by_line:
+            self._tokens_by_line[token.range.start.line] = []
 
         # Record the previous and next token for each token to allow traversing
         if self._prev_token:
@@ -108,7 +108,7 @@ def register_token(self, token: Token) -> None:
             self._prev_token.next_token = token
 
         # Store the token on its line
-        self.tokens_by_line[token.range.start.line].append(token)
+        self._tokens_by_line[token.range.start.line].append(token)
         self._prev_token = token
 
         # Store user-defined tokens together by name. Other token types could be stored,
@@ -119,21 +119,21 @@ def register_token(self, token: Token) -> None:
             # all instances of a memory token.
             token = token.without_address_modifier()
 
-            if str(token) not in self.tokens_by_name:
-                self.tokens_by_name[str(token)] = []
+            if str(token) not in self._tokens_by_name:
+                self._tokens_by_name[str(token)] = []
 
-            self.tokens_by_name[str(token)].append(token)
+            self._tokens_by_name[str(token)].append(token)
 
     def get_matching_tokens(self, token_name: str) -> list[Token]:
         """Retrieve all tokens with a given name in the program."""
-        return self.tokens_by_name.get(token_name.upper(), [])
+        return self._tokens_by_name.get(token_name.upper(), [])
 
     def get_token_at_position(self, position: lsp.Position) -> Token | None:
         """Retrieve the token at the given position."""
-        if position.line not in self.tokens_by_line:
+        if position.line not in self._tokens_by_line:
             return None
 
-        line_tokens = self.tokens_by_line[position.line]
+        line_tokens = self._tokens_by_line[position.line]
         token_starts = [t.range.start.character for t in line_tokens]
         token_ends = [t.range.end.character for t in line_tokens]
 

From 724967ef58d2655d9d0c06acbefa66f8e607d3f8 Mon Sep 17 00:00:00 2001
From: Aaron Zuspan <aa.zuspan@gmail.com>
Date: Sun, 11 Aug 2024 11:17:50 -0700
Subject: [PATCH 11/18] Refactor to remove CallbackDict

---
 src/spinasm_lsp/parser.py | 54 +++++++++++++--------------------------
 src/spinasm_lsp/server.py | 25 +++++++++++-------
 src/spinasm_lsp/utils.py  | 15 +----------
 tests/conftest.py         |  9 ++++---
 tests/test_utils.py       | 13 ----------
 5 files changed, 40 insertions(+), 76 deletions(-)

diff --git a/src/spinasm_lsp/parser.py b/src/spinasm_lsp/parser.py
index c44ccae..ac37db8 100644
--- a/src/spinasm_lsp/parser.py
+++ b/src/spinasm_lsp/parser.py
@@ -3,15 +3,17 @@
 from asfv1 import fv1parse
 from lsprotocol import types as lsp
 
-from spinasm_lsp.utils import CallbackDict, Token, TokenRegistry
+from spinasm_lsp.utils import Symbol, Token, TokenRegistry
 
 
 class SPINAsmParser(fv1parse):
     """A modified version of fv1parse optimized for use with LSP."""
 
+    sym: Symbol | None
+
     def __init__(self, source: str):
         self.diagnostics: list[lsp.Diagnostic] = []
-        self.definitions: dict[str, lsp.Position] = {}
+        self.definitions: dict[str, lsp.Range] = {}
         self.col: int = 0
         self.prevcol: int = 0
         self.token_registry = TokenRegistry()
@@ -28,39 +30,6 @@ def __init__(self, source: str):
         # Keep an unchanged copy of the original source
         self._source: list[str] = self.source.copy()
 
-        # Wrap the dictionaries to record whenever a definition is added
-        self.jmptbl: CallbackDict = CallbackDict(
-            self.jmptbl, callback=self._on_definition
-        )
-        self.symtbl: CallbackDict = CallbackDict(
-            self.symtbl, callback=self._on_definition
-        )
-
-    def _on_definition(self, label: str):
-        """Record the program location when a definition is added."""
-        # Don't record the position of constants that are defined at program
-        # initialization.
-        if self.current_line == -1:
-            return
-
-        # Due to the parsing order, the current line will be correct for labels but
-        # incorrect for assignments, which need to use previous line instead.
-        line = self.current_line if label in self.jmptbl else self.previous_line
-
-        # Try to find the position of the label on the definition line. Remove address
-        # modifiers from the label name, since those are defined implicitly by the
-        # parser rather than in the source.
-        try:
-            col = (
-                self._source[line]
-                .upper()
-                .index(label.replace("#", "").replace("^", ""))
-            )
-        except ValueError:
-            col = 0
-
-        self.definitions[label] = lsp.Position(line, col)
-
     def __mkopcodes__(self):
         """
         No-op.
@@ -134,7 +103,7 @@ def previous_line(self):
         return self.prevline - 1
 
     def __next__(self):
-        """Parse the next symbol and update the column."""
+        """Parse the next symbol and update the column and definitions."""
         super().__next__()
         self._update_column()
 
@@ -146,6 +115,19 @@ def __next__(self):
         token = Token(self.sym, range=token_range)
         self.token_registry.register_token(token)
 
+        base_token = token.without_address_modifier()
+        is_user_definable = base_token.symbol["type"] in ("LABEL", "TARGET")
+        is_defined = str(base_token) in self.jmptbl or str(base_token) in self.symtbl
+
+        if (
+            is_user_definable
+            and not is_defined
+            # Labels appear before their target definition, so override when the target
+            # is defined.
+            or base_token.symbol["type"] == "TARGET"
+        ):
+            self.definitions[str(base_token)] = base_token.range
+
     def _update_column(self):
         """Set the current column based on the last parsed symbol."""
         current_line_txt = self._source[self.current_line]
diff --git a/src/spinasm_lsp/server.py b/src/spinasm_lsp/server.py
index 95d15a0..dd35b90 100644
--- a/src/spinasm_lsp/server.py
+++ b/src/spinasm_lsp/server.py
@@ -186,16 +186,19 @@ async def definition(
     document = ls.workspace.get_text_document(params.text_document.uri)
 
     token = parser.token_registry.get_token_at_position(params.position)
+    if token is None:
+        return None
 
-    if str(token) not in parser.definitions:
+    # Definitions should be checked against the base token name, ignoring address
+    # modifiers.
+    base_token = token.without_address_modifier()
+
+    if str(base_token) not in parser.definitions:
         return None
 
     return lsp.Location(
         uri=document.uri,
-        range=lsp.Range(
-            start=parser.definitions[str(token)],
-            end=parser.definitions[str(token)],
-        ),
+        range=parser.definitions[str(base_token)],
     )
 
 
@@ -210,9 +213,13 @@ async def prepare_rename(ls: SPINAsmLanguageServer, params: lsp.PrepareRenamePar
         ls.info(f"No token to rename at {params.position}.")
         return None
 
+    # Renaming should be checked against the base token name, ignoring address
+    # modifiers.
+    base_token = token.without_address_modifier()
+
     # Only user-defined labels should support renaming
-    if str(token) not in parser.definitions:
-        ls.info(f"Can't rename non-user defined token {token}.")
+    if str(base_token) not in parser.definitions:
+        ls.info(f"Can't rename non-user defined token {base_token}.")
         return None
 
     return lsp.PrepareRenameResult_Type2(default_behavior=True)
@@ -230,8 +237,8 @@ async def rename(ls: SPINAsmLanguageServer, params: lsp.RenameParams):
 
     # For renaming purposes, we ignore address modifiers so that e.g. we can rename
     # `Delay` by renaming `Delay#`
-    token = token.without_address_modifier()
-    matching_tokens = parser.token_registry.get_matching_tokens(str(token))
+    base_token = token.without_address_modifier()
+    matching_tokens = parser.token_registry.get_matching_tokens(str(base_token))
 
     edits = [
         lsp.TextEdit(
diff --git a/src/spinasm_lsp/utils.py b/src/spinasm_lsp/utils.py
index 04f6fdb..34267fd 100644
--- a/src/spinasm_lsp/utils.py
+++ b/src/spinasm_lsp/utils.py
@@ -2,9 +2,8 @@
 
 import bisect
 import copy
-from collections import UserDict
 from dataclasses import dataclass
-from typing import Callable, Literal, TypedDict, cast
+from typing import Literal, TypedDict, cast
 
 import lsprotocol.types as lsp
 
@@ -22,18 +21,6 @@
 ]
 
 
-class CallbackDict(UserDict):
-    """A dictionary that fires a callback when an item is set."""
-
-    def __init__(self, dict=None, /, callback: Callable | None = None, **kwargs):
-        self.callback = callback or (lambda key: None)
-        super().__init__(dict, **kwargs)
-
-    def __setitem__(self, key, value):
-        super().__setitem__(key, value)
-        self.callback(key)
-
-
 class Symbol(TypedDict):
     """The token specification used by asfv1."""
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 8afdb26..10bd232 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -54,7 +54,7 @@ class RenameDict(TypedDict):
             uri=f"file:///{PATCH_DIR / 'Basic.spn'}",
             range=lsp.Range(
                 start=lsp.Position(line=23, character=4),
-                end=lsp.Position(line=23, character=4),
+                end=lsp.Position(line=23, character=8),
             ),
         ),
     },
@@ -66,7 +66,7 @@ class RenameDict(TypedDict):
             uri=f"file:///{PATCH_DIR / 'Basic.spn'}",
             range=lsp.Range(
                 start=lsp.Position(line=16, character=4),
-                end=lsp.Position(line=16, character=4),
+                end=lsp.Position(line=16, character=8),
             ),
         ),
     },
@@ -79,7 +79,7 @@ class RenameDict(TypedDict):
             uri=f"file:///{PATCH_DIR / 'Basic.spn'}",
             range=lsp.Range(
                 start=lsp.Position(line=16, character=4),
-                end=lsp.Position(line=16, character=4),
+                end=lsp.Position(line=16, character=8),
             ),
         ),
     },
@@ -91,7 +91,7 @@ class RenameDict(TypedDict):
             uri=f"file:///{PATCH_DIR / 'Basic.spn'}",
             range=lsp.Range(
                 start=lsp.Position(line=41, character=0),
-                end=lsp.Position(line=41, character=0),
+                end=lsp.Position(line=41, character=5),
             ),
         ),
     },
@@ -167,6 +167,7 @@ class RenameDict(TypedDict):
     },
 ]
 
+
 RENAMES: list[RenameDict] = [
     {
         "symbol": "ap1",
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 775ec3e..b4b6fe1 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -38,19 +38,6 @@ def sentence_token_registry() -> tuple[str, utils.TokenRegistry]:
     return sentence, utils.TokenRegistry(tokens)
 
 
-def test_callback_dict():
-    """Test that a CallbackDict calls its callback function when values are set."""
-    key_store = []
-    items = {
-        "foo": 42,
-        "bar": 0,
-        "baz": 99,
-    }
-    d = utils.CallbackDict(items, callback=lambda k: key_store.append(k))
-    assert d == items
-    assert key_store == ["foo", "bar", "baz"]
-
-
 def test_get_token_from_registry(sentence_token_registry):
     """Test that tokens are correctly retrieved by position from a registry."""
     sentence, reg = sentence_token_registry

From e99b2fff134da6e4ac242b368f9a8064d61e50ac Mon Sep 17 00:00:00 2001
From: Aaron Zuspan <aa.zuspan@gmail.com>
Date: Sun, 11 Aug 2024 11:31:50 -0700
Subject: [PATCH 12/18] Refactor to init Token from start position instead of
 range

---
 src/spinasm_lsp/parser.py | 46 ++++++++++++++++++++++++---------------
 src/spinasm_lsp/utils.py  | 42 ++++++++++++++++-------------------
 tests/test_utils.py       | 15 ++++---------
 3 files changed, 51 insertions(+), 52 deletions(-)

diff --git a/src/spinasm_lsp/parser.py b/src/spinasm_lsp/parser.py
index ac37db8..5fff46f 100644
--- a/src/spinasm_lsp/parser.py
+++ b/src/spinasm_lsp/parser.py
@@ -14,8 +14,8 @@ class SPINAsmParser(fv1parse):
     def __init__(self, source: str):
         self.diagnostics: list[lsp.Diagnostic] = []
         self.definitions: dict[str, lsp.Range] = {}
-        self.col: int = 0
-        self.prevcol: int = 0
+        self.current_character: int = 0
+        self.previous_character: int = 0
         self.token_registry = TokenRegistry()
 
         super().__init__(
@@ -38,14 +38,14 @@ def __mkopcodes__(self):
         """
 
     def _record_diagnostic(
-        self, msg: str, line: int, col: int, severity: lsp.DiagnosticSeverity
+        self, msg: str, line: int, character: int, severity: lsp.DiagnosticSeverity
     ):
         """Record a diagnostic message for the LSP."""
         self.diagnostics.append(
             lsp.Diagnostic(
                 range=lsp.Range(
-                    start=lsp.Position(line, character=col),
-                    end=lsp.Position(line, character=col),
+                    start=lsp.Position(line, character=character),
+                    end=lsp.Position(line, character=character),
                 ),
                 message=msg,
                 severity=severity,
@@ -60,13 +60,19 @@ def parseerror(self, msg: str, line: int | None = None):
 
         # Offset the line from the parser's 1-indexed line to the 0-indexed line
         self._record_diagnostic(
-            msg, line - 1, col=self.col, severity=lsp.DiagnosticSeverity.Error
+            msg,
+            line=line - 1,
+            character=self.current_character,
+            severity=lsp.DiagnosticSeverity.Error,
         )
 
     def scanerror(self, msg: str):
         """Override to record scanning errors as LSP diagnostics."""
         self._record_diagnostic(
-            msg, self.current_line, col=self.col, severity=lsp.DiagnosticSeverity.Error
+            msg,
+            line=self.current_line,
+            character=self.current_character,
+            severity=lsp.DiagnosticSeverity.Error,
         )
 
     def parsewarn(self, msg: str, line: int | None = None):
@@ -76,7 +82,10 @@ def parsewarn(self, msg: str, line: int | None = None):
 
         # Offset the line from the parser's 1-indexed line to the 0-indexed line
         self._record_diagnostic(
-            msg, line - 1, col=self.col, severity=lsp.DiagnosticSeverity.Warning
+            msg,
+            line=line - 1,
+            character=self.current_character,
+            severity=lsp.DiagnosticSeverity.Warning,
         )
 
     @property
@@ -89,8 +98,8 @@ def sline(self, value):
         self._sline = value
 
         # Reset the column to 0 when we move to a new line
-        self.prevcol = self.col
-        self.col = 0
+        self.previous_character = self.current_character
+        self.current_character = 0
 
     @property
     def current_line(self):
@@ -107,12 +116,11 @@ def __next__(self):
         super().__next__()
         self._update_column()
 
-        token_width = max(len(self.sym["txt"] or "") - 1, 0)
-        token_range = lsp.Range(
-            start=lsp.Position(line=self.current_line, character=self.col),
-            end=lsp.Position(line=self.current_line, character=self.col + token_width),
+        token_start = lsp.Position(
+            line=self.current_line, character=self.current_character
         )
-        token = Token(self.sym, range=token_range)
+
+        token = Token(self.sym, start=token_start)
         self.token_registry.register_token(token)
 
         base_token = token.without_address_modifier()
@@ -133,12 +141,14 @@ def _update_column(self):
         current_line_txt = self._source[self.current_line]
         current_symbol = self.sym.get("txt", None) or ""
 
-        self.prevcol = self.col
+        self.previous_character = self.current_character
         try:
             # Start at the current column to skip previous duplicates of the symbol
-            self.col = current_line_txt.index(current_symbol, self.col)
+            self.current_character = current_line_txt.index(
+                current_symbol, self.current_character
+            )
         except ValueError:
-            self.col = 0
+            self.current_character = 0
 
     def parse(self) -> SPINAsmParser:
         """Parse and return the parser."""
diff --git a/src/spinasm_lsp/utils.py b/src/spinasm_lsp/utils.py
index 34267fd..dfd7ec3 100644
--- a/src/spinasm_lsp/utils.py
+++ b/src/spinasm_lsp/utils.py
@@ -2,49 +2,45 @@
 
 import bisect
 import copy
-from dataclasses import dataclass
 from typing import Literal, TypedDict, cast
 
 import lsprotocol.types as lsp
 
-# Types of tokens defined by asfv1
-TokenType = Literal[
-    "ASSEMBLER",
-    "EOF",
-    "INTEGER",
-    "LABEL",
-    "TARGET",
-    "MNEMONIC",
-    "OPERATOR",
-    "FLOAT",
-    "ARGSEP",
-]
-
 
 class Symbol(TypedDict):
     """The token specification used by asfv1."""
 
-    type: TokenType
+    type: Literal[
+        "ASSEMBLER",
+        "EOF",
+        "INTEGER",
+        "LABEL",
+        "TARGET",
+        "MNEMONIC",
+        "OPERATOR",
+        "FLOAT",
+        "ARGSEP",
+    ]
     txt: str | None
     stxt: str | None
     val: int | float | None
 
 
-@dataclass
 class Token:
     """A token and its position in a source file."""
 
-    symbol: Symbol
-    range: lsp.Range
-    next_token: Token | None = None
-    prev_token: Token | None = None
+    def __init__(self, symbol: Symbol, start: lsp.Position) -> None:
+        width = max(len(symbol["stxt"] or "") - 1, 0)
+        end = lsp.Position(line=start.line, character=start.character + width)
+
+        self.symbol: Symbol = symbol
+        self.range: lsp.Range = lsp.Range(start=start, end=end)
+        self.next_token: Token | None = None
+        self.prev_token: Token | None = None
 
     def __str__(self):
         return self.symbol["stxt"] or "Empty token"
 
-    def __repr__(self):
-        return str(self)
-
     def _clone(self) -> Token:
         """Return a clone of the token to avoid mutating the original."""
         return copy.deepcopy(self)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index b4b6fe1..28be282 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -23,17 +23,10 @@ def sentence_token_registry() -> tuple[str, utils.TokenRegistry]:
 
     for t in token_vals:
         start = sentence.index(t["txt"], col)
-        end = start + len(t["txt"]) - 1
-        col = end + 1
-        tokens.append(
-            utils.Token(
-                t,
-                range=lsp.Range(
-                    start=lsp.Position(line=0, character=start),
-                    end=lsp.Position(line=0, character=end),
-                ),
-            )
-        )
+        token = utils.Token(t, start=lsp.Position(line=0, character=start))
+        col = token.range.end.character + 1
+
+        tokens.append(token)
 
     return sentence, utils.TokenRegistry(tokens)
 

From 3cc1557a20bd5bfe54d662012e5c816f0ccca261 Mon Sep 17 00:00:00 2001
From: Aaron Zuspan <aa.zuspan@gmail.com>
Date: Sun, 11 Aug 2024 12:10:53 -0700
Subject: [PATCH 13/18] Move token utils import parser module

---
 src/spinasm_lsp/documentation.py |   2 +
 src/spinasm_lsp/parser.py        | 137 ++++++++++++++++++++++++++++++-
 src/spinasm_lsp/server.py        |   2 +
 src/spinasm_lsp/utils.py         | 136 ------------------------------
 tests/test_parser.py             |  73 +++++++++++++++-
 tests/test_utils.py              |  77 -----------------
 6 files changed, 210 insertions(+), 217 deletions(-)
 delete mode 100644 src/spinasm_lsp/utils.py
 delete mode 100644 tests/test_utils.py

diff --git a/src/spinasm_lsp/documentation.py b/src/spinasm_lsp/documentation.py
index 8f2a371..197768e 100644
--- a/src/spinasm_lsp/documentation.py
+++ b/src/spinasm_lsp/documentation.py
@@ -1,3 +1,5 @@
+"""Documentation utilities for the SPINAsm LSP."""
+
 from __future__ import annotations
 
 from collections import UserDict
diff --git a/src/spinasm_lsp/parser.py b/src/spinasm_lsp/parser.py
index 5fff46f..921b44d 100644
--- a/src/spinasm_lsp/parser.py
+++ b/src/spinasm_lsp/parser.py
@@ -1,9 +1,142 @@
+"""The SPINAsm language parser."""
+
 from __future__ import annotations
 
+import bisect
+import copy
+from typing import Literal, TypedDict, cast
+
+import lsprotocol.types as lsp
 from asfv1 import fv1parse
-from lsprotocol import types as lsp
 
-from spinasm_lsp.utils import Symbol, Token, TokenRegistry
+
+class Symbol(TypedDict):
+    """The token specification used by asfv1."""
+
+    type: Literal[
+        "ASSEMBLER",
+        "EOF",
+        "INTEGER",
+        "LABEL",
+        "TARGET",
+        "MNEMONIC",
+        "OPERATOR",
+        "FLOAT",
+        "ARGSEP",
+    ]
+    txt: str | None
+    stxt: str | None
+    val: int | float | None
+
+
+class Token:
+    """A token and its position in a source file."""
+
+    def __init__(self, symbol: Symbol, start: lsp.Position) -> None:
+        width = max(len(symbol["stxt"] or "") - 1, 0)
+        end = lsp.Position(line=start.line, character=start.character + width)
+
+        self.symbol: Symbol = symbol
+        self.range: lsp.Range = lsp.Range(start=start, end=end)
+        self.next_token: Token | None = None
+        self.prev_token: Token | None = None
+
+    def __str__(self):
+        return self.symbol["stxt"] or "Empty token"
+
+    def _clone(self) -> Token:
+        """Return a clone of the token to avoid mutating the original."""
+        return copy.deepcopy(self)
+
+    def without_address_modifier(self) -> Token:
+        """
+        Create a clone of the token with the address modifier removed.
+        """
+        if not str(self).endswith("#") and not str(self).endswith("^"):
+            return self
+
+        token = self._clone()
+        token.symbol["stxt"] = cast(str, token.symbol["stxt"])[:-1]
+        token.range.end.character -= 1
+
+        return token
+
+
+class TokenRegistry:
+    """A registry of tokens and their positions in a source file."""
+
+    def __init__(self, tokens: list[Token] | None = None) -> None:
+        self._prev_token: Token | None = None
+
+        """A dictionary mapping program lines to all Tokens on that line."""
+        self._tokens_by_line: dict[int, list[Token]] = {}
+
+        """A dictionary mapping token names to all matching Tokens in the program."""
+        self._tokens_by_name: dict[str, list[Token]] = {}
+
+        for token in tokens or []:
+            self.register_token(token)
+
+    def register_token(self, token: Token) -> None:
+        """Add a token to the registry."""
+        # TODO: Maybe handle multi-word CHO instructions here, by merging with the next
+        # token? The tricky part is that the next token would still end up getting
+        # registered unless we prevent it... If we end up with overlapping tokens, that
+        # will break `get_token_at_position`. I could check if prev token was CHO when
+        # I register RDAL, SOF, or RDA, and if so register them as one and unregister
+        # the previous?
+        if token.range.start.line not in self._tokens_by_line:
+            self._tokens_by_line[token.range.start.line] = []
+
+        # Record the previous and next token for each token to allow traversing
+        if self._prev_token:
+            token.prev_token = self._prev_token
+            self._prev_token.next_token = token
+
+        # Store the token on its line
+        self._tokens_by_line[token.range.start.line].append(token)
+        self._prev_token = token
+
+        # Store user-defined tokens together by name. Other token types could be stored,
+        # but currently there's no use case for retrieving their positions.
+        if token.symbol["type"] in ("LABEL", "TARGET"):
+            # Tokens are stored by name without address modifiers, so that e.g. Delay#
+            # and Delay can be retrieved with the same query. This allows for renaming
+            # all instances of a memory token.
+            token = token.without_address_modifier()
+
+            if str(token) not in self._tokens_by_name:
+                self._tokens_by_name[str(token)] = []
+
+            self._tokens_by_name[str(token)].append(token)
+
+    def get_matching_tokens(self, token_name: str) -> list[Token]:
+        """Retrieve all tokens with a given name in the program."""
+        return self._tokens_by_name.get(token_name.upper(), [])
+
+    def get_token_at_position(self, position: lsp.Position) -> Token | None:
+        """Retrieve the token at the given position."""
+        if position.line not in self._tokens_by_line:
+            return None
+
+        line_tokens = self._tokens_by_line[position.line]
+        token_starts = [t.range.start.character for t in line_tokens]
+        token_ends = [t.range.end.character for t in line_tokens]
+
+        idx = bisect.bisect_left(token_starts, position.character)
+
+        # The index returned by bisect_left points to the start value >= character. This
+        # will either be the first character of the token or the start of the next
+        # token. First check if we're out of bounds, then shift left unless we're at the
+        # first character of the token.
+        if idx == len(line_tokens) or token_starts[idx] != position.character:
+            idx -= 1
+
+        # If the col falls after the end of the token, we're not inside a token.
+        if position.character > token_ends[idx]:
+            return None
+
+        return line_tokens[idx]
 
 
 class SPINAsmParser(fv1parse):
diff --git a/src/spinasm_lsp/server.py b/src/spinasm_lsp/server.py
index dd35b90..6ff5783 100644
--- a/src/spinasm_lsp/server.py
+++ b/src/spinasm_lsp/server.py
@@ -1,3 +1,5 @@
+"""The SPINAsm Language Server Protocol implementation."""
+
 from __future__ import annotations
 
 from functools import lru_cache
diff --git a/src/spinasm_lsp/utils.py b/src/spinasm_lsp/utils.py
deleted file mode 100644
index dfd7ec3..0000000
--- a/src/spinasm_lsp/utils.py
+++ /dev/null
@@ -1,136 +0,0 @@
-from __future__ import annotations
-
-import bisect
-import copy
-from typing import Literal, TypedDict, cast
-
-import lsprotocol.types as lsp
-
-
-class Symbol(TypedDict):
-    """The token specification used by asfv1."""
-
-    type: Literal[
-        "ASSEMBLER",
-        "EOF",
-        "INTEGER",
-        "LABEL",
-        "TARGET",
-        "MNEMONIC",
-        "OPERATOR",
-        "FLOAT",
-        "ARGSEP",
-    ]
-    txt: str | None
-    stxt: str | None
-    val: int | float | None
-
-
-class Token:
-    """A token and its position in a source file."""
-
-    def __init__(self, symbol: Symbol, start: lsp.Position) -> None:
-        width = max(len(symbol["stxt"] or "") - 1, 0)
-        end = lsp.Position(line=start.line, character=start.character + width)
-
-        self.symbol: Symbol = symbol
-        self.range: lsp.Range = lsp.Range(start=start, end=end)
-        self.next_token: Token | None = None
-        self.prev_token: Token | None = None
-
-    def __str__(self):
-        return self.symbol["stxt"] or "Empty token"
-
-    def _clone(self) -> Token:
-        """Return a clone of the token to avoid mutating the original."""
-        return copy.deepcopy(self)
-
-    def without_address_modifier(self) -> Token:
-        """
-        Create a clone of the token with the address modifier removed.
-        """
-        if not str(self).endswith("#") and not str(self).endswith("^"):
-            return self
-
-        token = self._clone()
-        token.symbol["stxt"] = cast(str, token.symbol["stxt"])[:-1]
-        token.range.end.character -= 1
-
-        return token
-
-
-class TokenRegistry:
-    """A registry of tokens and their positions in a source file."""
-
-    def __init__(self, tokens: list[Token] | None = None) -> None:
-        self._prev_token: Token | None = None
-
-        """A dictionary mapping program lines to all Tokens on that line."""
-        self._tokens_by_line: dict[int, list[Token]] = {}
-
-        """A dictionary mapping token names to all matching Tokens in the program."""
-        self._tokens_by_name: dict[str, list[Token]] = {}
-
-        for token in tokens or []:
-            self.register_token(token)
-
-    def register_token(self, token: Token) -> None:
-        """Add a token to the registry."""
-        # TODO: Maybe handle multi-word CHO instructions here, by merging with the next
-        # token? The tricky part is that the next token would still end up getting
-        # registered unless we prevent it... If we end up with overlapping tokens, that
-        # will break `get_token_at_position`. I could check if prev token was CHO when
-        # I register RDAL, SOF, or RDA, and if so register them as one and unregister
-        # the previous?
-        if token.range.start.line not in self._tokens_by_line:
-            self._tokens_by_line[token.range.start.line] = []
-
-        # Record the previous and next token for each token to allow traversing
-        if self._prev_token:
-            token.prev_token = self._prev_token
-            self._prev_token.next_token = token
-
-        # Store the token on its line
-        self._tokens_by_line[token.range.start.line].append(token)
-        self._prev_token = token
-
-        # Store user-defined tokens together by name. Other token types could be stored,
-        # but currently there's no use case for retrieving their positions.
-        if token.symbol["type"] in ("LABEL", "TARGET"):
-            # Tokens are stored by name without address modifiers, so that e.g. Delay#
-            # and Delay can be retrieved with the same query. This allows for renaming
-            # all instances of a memory token.
-            token = token.without_address_modifier()
-
-            if str(token) not in self._tokens_by_name:
-                self._tokens_by_name[str(token)] = []
-
-            self._tokens_by_name[str(token)].append(token)
-
-    def get_matching_tokens(self, token_name: str) -> list[Token]:
-        """Retrieve all tokens with a given name in the program."""
-        return self._tokens_by_name.get(token_name.upper(), [])
-
-    def get_token_at_position(self, position: lsp.Position) -> Token | None:
-        """Retrieve the token at the given position."""
-        if position.line not in self._tokens_by_line:
-            return None
-
-        line_tokens = self._tokens_by_line[position.line]
-        token_starts = [t.range.start.character for t in line_tokens]
-        token_ends = [t.range.end.character for t in line_tokens]
-
-        idx = bisect.bisect_left(token_starts, position.character)
-
-        # The index returned by bisect_left points to the start value >= character. This
-        # will either be the first character of the token or the start of the next
-        # token. First check if we're out of bounds, then shift left unless we're at the
-        # first character of the token.
-        if idx == len(line_tokens) or token_starts[idx] != position.character:
-            idx -= 1
-
-        # If the col falls after the end of the token, we're not inside a token.
-        if position.character > token_ends[idx]:
-            return None
-
-        return line_tokens[idx]
diff --git a/tests/test_parser.py b/tests/test_parser.py
index c0cf880..84af6cd 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -2,11 +2,12 @@
 
 from __future__ import annotations
 
+import lsprotocol.types as lsp
 import pytest
 
-from spinasm_lsp.parser import SPINAsmParser
+from spinasm_lsp.parser import SPINAsmParser, Token, TokenRegistry
 
-from .conftest import TEST_PATCHES
+from .conftest import PATCH_DIR, TEST_PATCHES
 
 
 @pytest.mark.parametrize("patch", TEST_PATCHES, ids=lambda x: x.stem)
@@ -14,3 +15,71 @@ def test_example_patches(patch):
     """Test that the example patches from SPINAsm are parsable."""
     with open(patch, encoding="utf-8") as f:
         assert SPINAsmParser(f.read())
+
+
+@pytest.fixture()
+def sentence_token_registry() -> tuple[str, TokenRegistry]:
+    """A sentence with a token registry for each word."""
+    sentence = "This   is a line    with words."
+
+    # Build a list of word tokens, ignoring whitespace. We'll build the tokens
+    # consistently with asfv1 parsed tokens.
+    words = list(filter(lambda x: x, sentence.split(" ")))
+    token_vals = [{"type": "LABEL", "txt": w, "stxt": w, "val": None} for w in words]
+    tokens = []
+    col = 0
+
+    for t in token_vals:
+        start = sentence.index(t["txt"], col)
+        token = Token(t, start=lsp.Position(line=0, character=start))
+        col = token.range.end.character + 1
+
+        tokens.append(token)
+
+    return sentence, TokenRegistry(tokens)
+
+
+def test_get_token_from_registry(sentence_token_registry):
+    """Test that tokens are correctly retrieved by position from a registry."""
+    sentence, reg = sentence_token_registry
+
+    # Manually build a mapping of column indexes to expected token words
+    token_positions = {i: None for i in range(len(sentence))}
+    for i in range(0, 4):
+        token_positions[i] = "This"
+    for i in range(7, 9):
+        token_positions[i] = "is"
+    for i in range(10, 11):
+        token_positions[i] = "a"
+    for i in range(12, 16):
+        token_positions[i] = "line"
+    for i in range(20, 24):
+        token_positions[i] = "with"
+    for i in range(25, 31):
+        token_positions[i] = "words."
+
+    for i, word in token_positions.items():
+        found_tok = reg.get_token_at_position(lsp.Position(line=0, character=i))
+        found_val = found_tok.symbol["txt"] if found_tok is not None else found_tok
+        msg = f"Expected token `{word}` at col {i}, found `{found_val}`"
+        assert found_val == word, msg
+
+
+def test_get_token_at_invalid_position_returns_none(sentence_token_registry):
+    """Test that retrieving tokens from out of bounds always returns None."""
+    _, reg = sentence_token_registry
+
+    assert reg.get_token_at_position(lsp.Position(line=99, character=99)) is None
+
+
+def test_get_token_positions():
+    """Test getting all positions of a token from a registry."""
+    patch = PATCH_DIR / "Basic.spn"
+    with open(patch) as fp:
+        source = fp.read()
+
+    parser = SPINAsmParser(source).parse()
+
+    all_matches = parser.token_registry.get_matching_tokens("apout")
+    assert len(all_matches) == 4
+    assert [t.range.start.line for t in all_matches] == [23, 57, 60, 70]
diff --git a/tests/test_utils.py b/tests/test_utils.py
deleted file mode 100644
index 28be282..0000000
--- a/tests/test_utils.py
+++ /dev/null
@@ -1,77 +0,0 @@
-"""Test LSP utilities."""
-
-import lsprotocol.types as lsp
-import pytest
-
-from spinasm_lsp import utils
-from spinasm_lsp.parser import SPINAsmParser
-
-from .conftest import PATCH_DIR
-
-
-@pytest.fixture()
-def sentence_token_registry() -> tuple[str, utils.TokenRegistry]:
-    """A sentence with a token registry for each word."""
-    sentence = "This   is a line    with words."
-
-    # Build a list of word tokens, ignoring whitespace. We'll build the tokens
-    # consistently with asfv1 parsed tokens.
-    words = list(filter(lambda x: x, sentence.split(" ")))
-    token_vals = [{"type": "LABEL", "txt": w, "stxt": w, "val": None} for w in words]
-    tokens = []
-    col = 0
-
-    for t in token_vals:
-        start = sentence.index(t["txt"], col)
-        token = utils.Token(t, start=lsp.Position(line=0, character=start))
-        col = token.range.end.character + 1
-
-        tokens.append(token)
-
-    return sentence, utils.TokenRegistry(tokens)
-
-
-def test_get_token_from_registry(sentence_token_registry):
-    """Test that tokens are correctly retrieved by position from a registry."""
-    sentence, reg = sentence_token_registry
-
-    # Manually build a mapping of column indexes to expected token words
-    token_positions = {i: None for i in range(len(sentence))}
-    for i in range(0, 4):
-        token_positions[i] = "This"
-    for i in range(7, 9):
-        token_positions[i] = "is"
-    for i in range(10, 11):
-        token_positions[i] = "a"
-    for i in range(12, 16):
-        token_positions[i] = "line"
-    for i in range(20, 24):
-        token_positions[i] = "with"
-    for i in range(25, 31):
-        token_positions[i] = "words."
-
-    for i, word in token_positions.items():
-        found_tok = reg.get_token_at_position(lsp.Position(line=0, character=i))
-        found_val = found_tok.symbol["txt"] if found_tok is not None else found_tok
-        msg = f"Expected token `{word}` at col {i}, found `{found_val}`"
-        assert found_val == word, msg
-
-
-def test_get_token_at_invalid_position_returns_none(sentence_token_registry):
-    """Test that retrieving tokens from out of bounds always returns None."""
-    _, reg = sentence_token_registry
-
-    assert reg.get_token_at_position(lsp.Position(line=99, character=99)) is None
-
-
-def test_get_token_positions():
-    """Test getting all positions of a token from a registry."""
-    patch = PATCH_DIR / "Basic.spn"
-    with open(patch) as fp:
-        source = fp.read()
-
-    parser = SPINAsmParser(source).parse()
-
-    all_matches = parser.token_registry.get_matching_tokens("apout")
-    assert len(all_matches) == 4
-    assert [t.range.start.line for t in all_matches] == [23, 57, 60, 70]

From fe06d33d37b4c54361e8b4db20306b414c9169d0 Mon Sep 17 00:00:00 2001
From: Aaron Zuspan <aa.zuspan@gmail.com>
Date: Sun, 11 Aug 2024 12:31:26 -0700
Subject: [PATCH 14/18] Refactor - store documentation in language server attr

---
 README.md                 |  6 +++---
 src/spinasm_lsp/server.py | 19 +++++++++++++------
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index b5c3e96..b4c4831 100644
--- a/README.md
+++ b/README.md
@@ -6,9 +6,9 @@ A Language Server Protocol (LSP) server to provide language support for the [SPI
 
 - **Diagnostics**: Reports the location of syntax errors and warnings.
 - **Hover**: Shows opcode documentation and assigned values on hover.
-- **Completion**: Provides suggestions for opcodes, labels, and defined values.
-- **Renaming**: Allows renaming of labels and defined values.
-- **Go to definition**: Jumps to the definition of a label or defined value.
+- **Completion**: Provides suggestions for opcodes, labels, and variables.
+- **Renaming**: Allows renaming of labels and variables.
+- **Go to definition**: Jumps to the definition of a label, memory address, or variable.
 
 ------
 
diff --git a/src/spinasm_lsp/server.py b/src/spinasm_lsp/server.py
index 6ff5783..24d846d 100644
--- a/src/spinasm_lsp/server.py
+++ b/src/spinasm_lsp/server.py
@@ -26,6 +26,8 @@ def _parse_document(source: str) -> SPINAsmParser:
 class SPINAsmLanguageServer(LanguageServer):
     def __init__(self, *args, **kwargs) -> None:
         self._prev_parser: SPINAsmParser | None = None
+        self.documentation = DocMap(folders=["instructions", "assemblers"])
+
         super().__init__(*args, name="spinasm-lsp", version=__version__, **kwargs)
 
     def debug(self, msg: Any) -> None:
@@ -60,8 +62,6 @@ async def get_parser(self, uri: str) -> SPINAsmParser:
 
 
 server = SPINAsmLanguageServer(max_workers=5)
-# TODO: Probably load async as part of a custom language server subclass
-DOCUMENTATION = DocMap(folders=["instructions", "assemblers"])
 
 
 @server.feature(lsp.TEXT_DOCUMENT_DID_CHANGE)
@@ -114,7 +114,7 @@ async def hover(ls: SPINAsmLanguageServer, params: lsp.HoverParams) -> lsp.Hover
         # should be treated as part of the instruction for retrieving documentation.
         if token_val == "RDA" and str(token.prev_token) == "CHO":
             token_val = f"CHO {token_val}"
-            hover_msg = DOCUMENTATION.get(token_val, "")
+            hover_msg = ls.documentation.get(token_val, "")
         # Label definitions and targets
         elif token_val in parser.jmptbl:
             hover_definition = parser.jmptbl[token_val.upper()]
@@ -134,7 +134,7 @@ async def hover(ls: SPINAsmLanguageServer, params: lsp.HoverParams) -> lsp.Hover
         elif token_val == "CHO" and token.next_token is not None:
             token_val = f"CHO {str(token.next_token)}"
 
-        hover_msg = DOCUMENTATION.get(token_val, "")
+        hover_msg = ls.documentation.get(token_val, "")
 
     return (
         None
@@ -152,15 +152,22 @@ async def completions(
     """Returns completion items."""
     parser = await ls.get_parser(params.text_document.uri)
 
-    opcodes = [k.upper() for k in DOCUMENTATION]
+    opcodes = [k.upper() for k in ls.documentation]
     symbols = list(parser.symtbl.keys())
     labels = list(parser.jmptbl.keys())
     mem = list(parser.mem.keys())
 
     opcode_items = [
-        lsp.CompletionItem(label=k, kind=lsp.CompletionItemKind.Function)
+        lsp.CompletionItem(
+            label=k,
+            kind=lsp.CompletionItemKind.Function,
+            documentation=lsp.MarkupContent(
+                kind=lsp.MarkupKind.Markdown, value=ls.documentation[k.upper()]
+            ),
+        )
         for k in opcodes
     ]
+    # TODO: Set details for all the completions below using the same info as the hover.
     symbol_items = [
         lsp.CompletionItem(label=k, kind=lsp.CompletionItemKind.Constant)
         for k in symbols

From 73c2647d65c04bb7fcacd44819640b4dd1a0b170 Mon Sep 17 00:00:00 2001
From: Aaron Zuspan <aa.zuspan@gmail.com>
Date: Sun, 11 Aug 2024 13:09:13 -0700
Subject: [PATCH 15/18] Handle multi-word tokens (CHO RDAL) in the registry

---
 src/spinasm_lsp/parser.py | 74 ++++++++++++++++++++++++++++++++-------
 src/spinasm_lsp/server.py | 16 +--------
 tests/test_parser.py      | 25 +++++++++++++
 3 files changed, 88 insertions(+), 27 deletions(-)

diff --git a/src/spinasm_lsp/parser.py b/src/spinasm_lsp/parser.py
index 921b44d..d43a941 100644
--- a/src/spinasm_lsp/parser.py
+++ b/src/spinasm_lsp/parser.py
@@ -30,20 +30,64 @@ class Symbol(TypedDict):
 
 
 class Token:
-    """A token and its position in a source file."""
-
-    def __init__(self, symbol: Symbol, start: lsp.Position) -> None:
-        width = max(len(symbol["stxt"] or "") - 1, 0)
-        end = lsp.Position(line=start.line, character=start.character + width)
+    """
+    A parsed token.
+
+    Parameters
+    ----------
+    symbol : Symbol
+        The symbol parsed by asfv1 representing the token.
+    start : lsp.Position
+        The start position of the token in the source file.
+    end : lsp.Position, optional
+        The end position of the token in the source file. If not provided, the end
+        position is calculated based on the width of the symbol's stxt.
+
+    Attributes
+    ----------
+    symbol : Symbol
+        The symbol parsed by asfv1 representing the token.
+    range : lsp.Range
+        The location range of the token in the source file.
+    next_token : Token | None
+        The token that follows this token in the source file.
+    prev_token : Token | None
+        The token that precedes this token in the source file.
+    """
+
+    def __init__(
+        self, symbol: Symbol, start: lsp.Position, end: lsp.Position | None = None
+    ):
+        if end is None:
+            width = max(len(symbol["stxt"] or "") - 1, 0)
+            end = lsp.Position(line=start.line, character=start.character + width)
 
         self.symbol: Symbol = symbol
         self.range: lsp.Range = lsp.Range(start=start, end=end)
         self.next_token: Token | None = None
         self.prev_token: Token | None = None
 
-    def __str__(self):
+    def __repr__(self):
         return self.symbol["stxt"] or "Empty token"
 
+    def concatenate(self, other: Token) -> Token:
+        """
+        Concatenate by merging with another token, in place.
+
+        In practice, this is used for the multi-word opcodes that are parsed as separate
+        tokens: CHO RDA, CHO RDAL, and CHO SOF.
+        """
+        if any(
+            symbol_type not in ("MNEMONIC", "LABEL")
+            for symbol_type in (self.symbol["type"], other.symbol["type"])
+        ):
+            raise TypeError("Only MNEMONIC and LABEL symbols can be concatenated.")
+
+        self.symbol["txt"] = f"{self.symbol['txt']} {other.symbol['txt']}"
+        self.symbol["stxt"] = f"{self.symbol['stxt']} {other.symbol['stxt']}"
+        self.range.end = other.range.end
+        return self
+
     def _clone(self) -> Token:
         """Return a clone of the token to avoid mutating the original."""
         return copy.deepcopy(self)
@@ -79,12 +123,12 @@ def __init__(self, tokens: list[Token] | None = None) -> None:
 
     def register_token(self, token: Token) -> None:
         """Add a token to the registry."""
-        # TODO: Maybe handle multi-word CHO instructions here, by merging with the next
-        # token? The tricky part is that the next token would still end up getting
-        # registered unless we prevent it... If we end up with overlapping tokens, that
-        # will break `get_token_at_position`. I could check if prev token was CHO when
-        # I register RDAL, SOF, or RDA, and if so register them as one and unregister
-        # the previous?
+        # Handle multi-word CHO instructions by merging the second token with the first
+        # and skipping the second token.
+        if str(self._prev_token) == "CHO" and str(token) in ("RDA", "RDAL", "SOF"):
+            self._prev_token.concatenate(token)  # type: ignore
+            return
+
         if token.range.start.line not in self._tokens_by_line:
             self._tokens_by_line[token.range.start.line] = []
 
@@ -287,3 +331,9 @@ def parse(self) -> SPINAsmParser:
         """Parse and return the parser."""
         super().parse()
         return self
+
+
+if __name__ == "__main__":
+    code = r"""cho rda,sin0,sin|reg|compc,0"""
+    parsed = SPINAsmParser(code).parse()
+    print(parsed.token_registry._tokens_by_line[0])
diff --git a/src/spinasm_lsp/server.py b/src/spinasm_lsp/server.py
index 24d846d..eb7222d 100644
--- a/src/spinasm_lsp/server.py
+++ b/src/spinasm_lsp/server.py
@@ -110,13 +110,8 @@ async def hover(ls: SPINAsmLanguageServer, params: lsp.HoverParams) -> lsp.Hover
 
     hover_msg = None
     if token_type in ("LABEL", "TARGET"):
-        # Special case for hovering over the second word of a CHO instruction, which
-        # should be treated as part of the instruction for retrieving documentation.
-        if token_val == "RDA" and str(token.prev_token) == "CHO":
-            token_val = f"CHO {token_val}"
-            hover_msg = ls.documentation.get(token_val, "")
         # Label definitions and targets
-        elif token_val in parser.jmptbl:
+        if token_val in parser.jmptbl:
             hover_definition = parser.jmptbl[token_val.upper()]
             hover_msg = f"(label) {token_val}: Offset[**{hover_definition}**]"
         # Variable and memory definitions
@@ -125,15 +120,6 @@ async def hover(ls: SPINAsmLanguageServer, params: lsp.HoverParams) -> lsp.Hover
             hover_msg = f"(constant) {token_val}: Literal[**{hover_definition}**]"
     # Opcodes and assignments
     elif token_type in ("ASSEMBLER", "MNEMONIC"):
-        # Special case for hovering over the second word of a CHO instruction, which
-        # should be treated as part of the instruction for retrieving documentation.
-        if token_val in ("SOF", "RDA") and str(token.prev_token) == "CHO":
-            token_val = f"CHO {token_val}"
-        # CHO is a special opcode that treats its first argument as part of the
-        # instruction, for the sake of documentation.
-        elif token_val == "CHO" and token.next_token is not None:
-            token_val = f"CHO {str(token.next_token)}"
-
         hover_msg = ls.documentation.get(token_val, "")
 
     return (
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 84af6cd..8d05d08 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -83,3 +83,28 @@ def test_get_token_positions():
     all_matches = parser.token_registry.get_matching_tokens("apout")
     assert len(all_matches) == 4
     assert [t.range.start.line for t in all_matches] == [23, 57, 60, 70]
+
+
+def test_concatenate_cho_rdal_tokens():
+    """Test that CHO and RDAL tokens are concatenated correctly into CHO RDAL."""
+    cho_rdal = Token(
+        symbol={"type": "MNEMONIC", "txt": "cho", "stxt": "CHO", "val": None},
+        start=lsp.Position(line=0, character=0),
+    ).concatenate(
+        Token(
+            symbol={"type": "LABEL", "txt": "rdal", "stxt": "RDAL", "val": None},
+            # Put whitespace between CHO and RDAL to test that range is calculated
+            start=lsp.Position(line=0, character=10),
+        )
+    )
+
+    assert cho_rdal.symbol == {
+        "type": "MNEMONIC",
+        "txt": "cho rdal",
+        "stxt": "CHO RDAL",
+        "val": None,
+    }
+
+    assert cho_rdal.range == lsp.Range(
+        start=lsp.Position(line=0, character=0), end=lsp.Position(line=0, character=13)
+    )

From 3965c0c1f597326abac46388d578638bb6048018 Mon Sep 17 00:00:00 2001
From: Aaron Zuspan <aa.zuspan@gmail.com>
Date: Sun, 11 Aug 2024 13:22:28 -0700
Subject: [PATCH 16/18] Ignore EOF tokens to simplify typing

---
 src/spinasm_lsp/parser.py | 30 ++++++++++++++++++------------
 src/spinasm_lsp/server.py |  4 ----
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/spinasm_lsp/parser.py b/src/spinasm_lsp/parser.py
index d43a941..79fd5b7 100644
--- a/src/spinasm_lsp/parser.py
+++ b/src/spinasm_lsp/parser.py
@@ -4,18 +4,21 @@
 
 import bisect
 import copy
-from typing import Literal, TypedDict, cast
+from typing import Literal, TypedDict
 
 import lsprotocol.types as lsp
 from asfv1 import fv1parse
 
 
 class Symbol(TypedDict):
-    """The token specification used by asfv1."""
+    """
+    The token specification used by asfv1.
+
+    Note that we exclude EOF tokens, as they are ignored by the LSP.
+    """
 
     type: Literal[
         "ASSEMBLER",
-        "EOF",
         "INTEGER",
         "LABEL",
         "TARGET",
@@ -24,8 +27,8 @@ class Symbol(TypedDict):
         "FLOAT",
         "ARGSEP",
     ]
-    txt: str | None
-    stxt: str | None
+    txt: str
+    stxt: str
     val: int | float | None
 
 
@@ -59,16 +62,16 @@ def __init__(
         self, symbol: Symbol, start: lsp.Position, end: lsp.Position | None = None
     ):
         if end is None:
-            width = max(len(symbol["stxt"] or "") - 1, 0)
-            end = lsp.Position(line=start.line, character=start.character + width)
+            width = len(symbol["stxt"])
+            end = lsp.Position(line=start.line, character=start.character + width - 1)
 
         self.symbol: Symbol = symbol
         self.range: lsp.Range = lsp.Range(start=start, end=end)
         self.next_token: Token | None = None
         self.prev_token: Token | None = None
 
-    def __repr__(self):
-        return self.symbol["stxt"] or "Empty token"
+    def __repr__(self) -> str:
+        return self.symbol["stxt"]
 
     def concatenate(self, other: Token) -> Token:
         """
@@ -83,8 +86,8 @@ def concatenate(self, other: Token) -> Token:
         ):
             raise TypeError("Only MNEMONIC and LABEL symbols can be concatenated.")
 
-        self.symbol["txt"] = f"{self.symbol['txt']} {other.symbol['txt']}"
-        self.symbol["stxt"] = f"{self.symbol['stxt']} {other.symbol['stxt']}"
+        self.symbol["txt"] += f" {other.symbol['txt']}"
+        self.symbol["stxt"] += f" {other.symbol['stxt']}"
         self.range.end = other.range.end
         return self
 
@@ -100,7 +103,7 @@ def without_address_modifier(self) -> Token:
             return self
 
         token = self._clone()
-        token.symbol["stxt"] = cast(str, token.symbol["stxt"])[:-1]
+        token.symbol["stxt"] = token.symbol["stxt"][:-1]
         token.range.end.character -= 1
 
         return token
@@ -291,6 +294,9 @@ def previous_line(self):
     def __next__(self):
         """Parse the next symbol and update the column and definitions."""
         super().__next__()
+        if self.sym["type"] == "EOF":
+            return
+
         self._update_column()
 
         token_start = lsp.Position(
diff --git a/src/spinasm_lsp/server.py b/src/spinasm_lsp/server.py
index eb7222d..a17e892 100644
--- a/src/spinasm_lsp/server.py
+++ b/src/spinasm_lsp/server.py
@@ -104,10 +104,6 @@ async def hover(ls: SPINAsmLanguageServer, params: lsp.HoverParams) -> lsp.Hover
     token_val = token.symbol["stxt"]
     token_type = token.symbol["type"]
 
-    # stxt should only be None for EOF tokens, but check to be sure.
-    if token_type == "EOF" or not isinstance(token_val, str):
-        return None
-
     hover_msg = None
     if token_type in ("LABEL", "TARGET"):
         # Label definitions and targets

From 0e2477527f3bc733bd0b93918a6cbe48b11e4eb7 Mon Sep 17 00:00:00 2001
From: Aaron Zuspan <aa.zuspan@gmail.com>
Date: Sun, 11 Aug 2024 13:32:25 -0700
Subject: [PATCH 17/18] Refactoring

---
 src/spinasm_lsp/server.py | 28 +++++++---------------------
 1 file changed, 7 insertions(+), 21 deletions(-)

diff --git a/src/spinasm_lsp/server.py b/src/spinasm_lsp/server.py
index a17e892..c369140 100644
--- a/src/spinasm_lsp/server.py
+++ b/src/spinasm_lsp/server.py
@@ -97,8 +97,7 @@ async def hover(ls: SPINAsmLanguageServer, params: lsp.HoverParams) -> lsp.Hover
     """Retrieve documentation from symbols on hover."""
     parser = await ls.get_parser(params.text_document.uri)
 
-    token = parser.token_registry.get_token_at_position(params.position)
-    if token is None:
+    if (token := parser.token_registry.get_token_at_position(params.position)) is None:
         return None
 
     token_val = token.symbol["stxt"]
@@ -176,8 +175,7 @@ async def definition(
 
     document = ls.workspace.get_text_document(params.text_document.uri)
 
-    token = parser.token_registry.get_token_at_position(params.position)
-    if token is None:
+    if (token := parser.token_registry.get_token_at_position(params.position)) is None:
         return None
 
     # Definitions should be checked against the base token name, ignoring address
@@ -199,13 +197,10 @@ async def prepare_rename(ls: SPINAsmLanguageServer, params: lsp.PrepareRenamePar
     is a valid operation."""
     parser = await ls.get_parser(params.text_document.uri)
 
-    token = parser.token_registry.get_token_at_position(params.position)
-    if token is None:
-        ls.info(f"No token to rename at {params.position}.")
+    if (token := parser.token_registry.get_token_at_position(params.position)) is None:
         return None
 
-    # Renaming should be checked against the base token name, ignoring address
-    # modifiers.
+    # Renaming is checked against the base token name, ignoring address modifiers.
     base_token = token.without_address_modifier()
 
     # Only user-defined labels should support renaming
@@ -222,23 +217,14 @@ async def prepare_rename(ls: SPINAsmLanguageServer, params: lsp.PrepareRenamePar
 async def rename(ls: SPINAsmLanguageServer, params: lsp.RenameParams):
     parser = await ls.get_parser(params.text_document.uri)
 
-    token = parser.token_registry.get_token_at_position(params.position)
-    if token is None:
+    if (token := parser.token_registry.get_token_at_position(params.position)) is None:
         return None
 
-    # For renaming purposes, we ignore address modifiers so that e.g. we can rename
-    # `Delay` by renaming `Delay#`
+    # Ignore address modifiers so that e.g. we can rename `Delay` by renaming `Delay#`
     base_token = token.without_address_modifier()
     matching_tokens = parser.token_registry.get_matching_tokens(str(base_token))
 
-    edits = [
-        lsp.TextEdit(
-            range=t.range,
-            new_text=params.new_name,
-        )
-        for t in matching_tokens
-    ]
-
+    edits = [lsp.TextEdit(t.range, new_text=params.new_name) for t in matching_tokens]
     return lsp.WorkspaceEdit(changes={params.text_document.uri: edits})
 
 

From ff5d2cb314b04f9ac86e5a0e9b407f61b5b24da9 Mon Sep 17 00:00:00 2001
From: Aaron Zuspan <aa.zuspan@gmail.com>
Date: Sun, 11 Aug 2024 14:14:15 -0700
Subject: [PATCH 18/18] Add completion item details

---
 src/spinasm_lsp/server.py | 78 +++++++++++++++++++++------------------
 tests/conftest.py         | 14 +++++--
 tests/test_server.py      | 17 ++++++++-
 3 files changed, 68 insertions(+), 41 deletions(-)

diff --git a/src/spinasm_lsp/server.py b/src/spinasm_lsp/server.py
index c369140..b145b72 100644
--- a/src/spinasm_lsp/server.py
+++ b/src/spinasm_lsp/server.py
@@ -92,6 +92,19 @@ def did_close(
     ls.publish_diagnostics(params.text_document.uri, [])
 
 
+def _get_defined_hover(stxt: str, parser: SPINAsmParser) -> str:
+    """Get a hover message with the value of a defined variable or label."""
+    # Check jmptbl first since labels are also defined in symtbl
+    if stxt in parser.jmptbl:
+        hover_definition = parser.jmptbl[stxt]
+        return f"(label) {stxt}: Offset[{hover_definition}]"
+    if stxt in parser.symtbl:
+        hover_definition = parser.symtbl[stxt]
+        return f"(constant) {stxt}: Literal[{hover_definition}]"
+
+    return ""
+
+
 @server.feature(lsp.TEXT_DOCUMENT_HOVER)
 async def hover(ls: SPINAsmLanguageServer, params: lsp.HoverParams) -> lsp.Hover | None:
     """Retrieve documentation from symbols on hover."""
@@ -100,22 +113,12 @@ async def hover(ls: SPINAsmLanguageServer, params: lsp.HoverParams) -> lsp.Hover
     if (token := parser.token_registry.get_token_at_position(params.position)) is None:
         return None
 
-    token_val = token.symbol["stxt"]
-    token_type = token.symbol["type"]
-
     hover_msg = None
-    if token_type in ("LABEL", "TARGET"):
-        # Label definitions and targets
-        if token_val in parser.jmptbl:
-            hover_definition = parser.jmptbl[token_val.upper()]
-            hover_msg = f"(label) {token_val}: Offset[**{hover_definition}**]"
-        # Variable and memory definitions
-        elif token_val in parser.symtbl:
-            hover_definition = parser.symtbl[token_val.upper()]
-            hover_msg = f"(constant) {token_val}: Literal[**{hover_definition}**]"
-    # Opcodes and assignments
-    elif token_type in ("ASSEMBLER", "MNEMONIC"):
-        hover_msg = ls.documentation.get(token_val, "")
+    if token.symbol["type"] in ("LABEL", "TARGET"):
+        hover_msg = _get_defined_hover(str(token), parser=parser)
+
+    elif token.symbol["type"] in ("ASSEMBLER", "MNEMONIC"):
+        hover_msg = ls.documentation.get(str(token), "")
 
     return (
         None
@@ -133,36 +136,39 @@ async def completions(
     """Returns completion items."""
     parser = await ls.get_parser(params.text_document.uri)
 
-    opcodes = [k.upper() for k in ls.documentation]
-    symbols = list(parser.symtbl.keys())
-    labels = list(parser.jmptbl.keys())
-    mem = list(parser.mem.keys())
+    symbol_completions = [
+        lsp.CompletionItem(
+            label=symbol,
+            kind=lsp.CompletionItemKind.Constant,
+            detail=_get_defined_hover(symbol, parser=parser),
+        )
+        for symbol in parser.symtbl
+    ]
 
-    opcode_items = [
+    label_completions = [
         lsp.CompletionItem(
-            label=k,
+            label=label,
+            kind=lsp.CompletionItemKind.Reference,
+            detail=_get_defined_hover(label, parser=parser),
+        )
+        for label in parser.jmptbl
+    ]
+
+    opcode_completions = [
+        lsp.CompletionItem(
+            label=opcode,
             kind=lsp.CompletionItemKind.Function,
+            detail="(opcode)",
             documentation=lsp.MarkupContent(
-                kind=lsp.MarkupKind.Markdown, value=ls.documentation[k.upper()]
+                kind=lsp.MarkupKind.Markdown, value=ls.documentation[opcode]
             ),
         )
-        for k in opcodes
-    ]
-    # TODO: Set details for all the completions below using the same info as the hover.
-    symbol_items = [
-        lsp.CompletionItem(label=k, kind=lsp.CompletionItemKind.Constant)
-        for k in symbols
-    ]
-    label_items = [
-        lsp.CompletionItem(label=k, kind=lsp.CompletionItemKind.Reference)
-        for k in labels
-    ]
-    mem_items = [
-        lsp.CompletionItem(label=k, kind=lsp.CompletionItemKind.Variable) for k in mem
+        for opcode in [k.upper() for k in ls.documentation]
     ]
 
     return lsp.CompletionList(
-        is_incomplete=False, items=opcode_items + mem_items + symbol_items + label_items
+        is_incomplete=False,
+        items=symbol_completions + label_completions + opcode_completions,
     )
 
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 10bd232..d9743a2 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -23,7 +23,7 @@ class HoverDict(TypedDict):
 
     symbol: str
     position: lsp.Position
-    contains: str
+    contains: str | None
 
 
 class PrepareRenameDict(TypedDict):
@@ -113,17 +113,17 @@ class RenameDict(TypedDict):
     {
         "symbol": "endclr",
         "position": lsp.Position(line=37, character=13),
-        "contains": "(label) ENDCLR: Offset[**4**]",
+        "contains": "(label) ENDCLR: Offset[4]",
     },
     {
         "symbol": "mono",
         "position": lsp.Position(line=47, character=5),
-        "contains": "(constant) MONO: Literal[**32**]",
+        "contains": "(constant) MONO: Literal[32]",
     },
     {
         "symbol": "lap2b#",
         "position": lsp.Position(line=73, character=4),
-        "contains": "(constant) LAP2B#: Literal[**9802**]",
+        "contains": "(constant) LAP2B#: Literal[9802]",
     },
     {
         # CHO RDA, hovering over CHO
@@ -137,6 +137,12 @@ class RenameDict(TypedDict):
         "position": lsp.Position(line=85, character=4),
         "contains": "## `CHO RDA N, C, D`",
     },
+    {
+        # Hovering over an int, which should return no hover info
+        "symbol": "None",
+        "position": lsp.Position(line=8, character=8),
+        "contains": None,
+    },
 ]
 
 
diff --git a/tests/test_server.py b/tests/test_server.py
index be8dce7..5029726 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -49,6 +49,7 @@ async def test_definition(assignment: dict, client: LanguageClient):
 
 @pytest.mark.asyncio()
 async def test_completions(client: LanguageClient):
+    """Test that expected completions are shown with details and documentation."""
     patch = PATCH_DIR / "Basic.spn"
 
     results = await client.text_document_completion_async(
@@ -81,6 +82,16 @@ async def test_completions(client: LanguageClient):
     for completion in expected_completions:
         assert completion in completions, f"Expected completion {completion} not found"
 
+    # Completions for defined values should show their literal value
+    apout_completion = [item for item in results.items if item.label == "APOUT"][0]
+    assert apout_completion.detail == "(constant) APOUT: Literal[33]"
+    assert apout_completion.documentation is None
+
+    # Completions for opcodes should include their documentation
+    cho_rda_completion = [item for item in results.items if item.label == "CHO RDA"][0]
+    assert cho_rda_completion.detail == "(opcode)"
+    assert "## `CHO RDA N, C, D`" in str(cho_rda_completion.documentation)
+
 
 @pytest.mark.asyncio()
 async def test_diagnostic_parsing_errors(client: LanguageClient):
@@ -161,7 +172,11 @@ async def test_hover(hover: dict, client: LanguageClient):
         )
     )
 
-    assert hover["contains"] in result.contents.value
+    if hover["contains"] is None:
+        assert result is None, "Expected no hover result"
+    else:
+        msg = f"Hover does not contain `{hover['contains']}`"
+        assert hover["contains"] in result.contents.value, msg
 
 
 @pytest.mark.parametrize("prepare", PREPARE_RENAMES, ids=lambda x: x["symbol"])