-
Notifications
You must be signed in to change notification settings - Fork 29
/
sectorforth.asm
429 lines (391 loc) · 17.3 KB
/
sectorforth.asm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
; sectorforth - a 512-byte, bootable x86 Forth.
; Copyright (c) 2020 Cesar Blum
; Distributed under the MIT license. See LICENSE for details.
;
; sectorforth is a 16-bit x86 Forth that fits entirely within a
; boot sector (512 bytes).
;
; It's a direct threaded Forth, with SI acting as the Forth
; instruction pointer. Words are executed using LODSW to advance
; SI and load the next word's address into AX, which is then
; jumped to.
;
; The SP register is used as the data stack pointer, and the BP
; register acts as the return stack pointer.
;
; The minimum CPU required to run sectorforth is the 386, to use
; the SETNZ instruction.
bits 16
cpu 386
; Set CS to a known value by performing a far jump. Memory up to
; 0x0500 is used by the BIOS. Setting the segment to 0x0500 gives
; sectorforth an entire free segment to work with.
jmp 0x0050:start
; On x86, the boot sector is loaded at 0x7c00 on boot. In segment
; 0x0500, that's 0x7700 (0x0050 << 4 + 0x7700 == 0x7c00).
org 0x7700
; Define constants for the memory map. Everything is organized
; within a single 64 KB segment. TIB is placed at 0x0000 to
; simplify input parsing (the Forth variable >IN ends up being
; also a pointer into TIB, so there's no need to add >IN to TIB
; to get a pointer to the parse area). TIB is 4 KB long.
TIB equ 0x0000 ; terminal input buffer (TIB)
STATE equ 0x1000 ; current state (0=interpret, 1=compile)
TOIN equ 0x1002 ; current read offset into TIB (>IN)
RP0 equ 0x76fe ; bottom of return stack
SP0 equ 0xfffe ; bottom of data stack
; Each dictionary entry is laid out in memory as such:
;
; *--------------*--------------*--------------*--------------*
; | Link pointer | Flags+Length | Name... | Code... |
; *--------------*--------------*--------------*--------------*
; 2 bytes 1 byte Length bytes Variable
;
; Flags IMMEDIATE and HIDDEN are used in assembly code. Room is
; left for an additional, user-defined flag, so word names are
; limited to 32 characters.
F_IMMEDIATE equ 0x80
F_HIDDEN equ 0x40
LENMASK equ 0x1f
; Each dictionary entry needs a link to the previous entry. The
; last entry links to zero, marking the end of the dictionary.
; As dictionary entries are defined, link will be redefined to
; point to the previous entry.
%define link 0
; defword lays out a dictionary entry where it is expanded.
%macro defword 2-3 0 ; name, label, flags
word_%2:
dw link ; link to previous word
%define link word_%2
%strlen %%len %1
db %3+%%len ; flags+length
db %1 ; name
%2: ; code starts here
%endmacro
; NEXT advances execution to the next word. The actual code is
; placed further ahead for strategic reasons. The macro has to be
; defined here, since it's used in the words defined ahead.
%define NEXT jmp next
; sectorforth has only eight primitive words, with which
; everything else can be built in Forth:
;
; @ ( addr -- x ) Fetch memory at addr
; ! ( x addr -- ) Store x at addr
; sp@ ( -- addr ) Get current data stack pointer
; rp@ ( -- addr ) Get current return stack pointer
; 0= ( x -- f ) -1 if top of stack is 0, 0 otherwise
; + ( x1 x2 -- n ) Add the two values at the top of the stack
; nand ( x1 x2 -- n ) NAND the two values at the top of the stack
; exit ( r:addr -- ) Resume execution at address at the top of
; the return stack
defword "@",FETCH
pop bx
push word [bx]
NEXT
defword "!",STORE
pop bx
pop word [bx]
NEXT
defword "sp@",SPFETCH
push sp
NEXT
defword "rp@",RPFETCH
push bp
NEXT
defword "0=",ZEROEQUALS
pop ax
test ax,ax
setnz al ; AL=0 if ZF=1, else AL=1
dec ax ; AL=ff if AL=0, else AL=0
cbw ; AH=AL
push ax
NEXT
defword "+",PLUS
pop bx
pop ax
add ax,bx
push ax
NEXT
defword "nand",NAND
pop bx
pop ax
and ax,bx
not ax
push ax
NEXT
defword "exit",EXIT
xchg sp,bp ; swap SP and BP, SP controls return stack
pop si ; pop address to next word
xchg sp,bp ; restore SP and BP
NEXT
; Besides primitives, a few variables are exposed to Forth code:
; TIB, STATE, >IN, HERE, and LATEST. With sectorforth's >IN being
; both an offset and a pointer into TIB (as TIB starts at 0x0000),
; TIB could be left out. But it is exposed so that sectorforth
; code that accesses the parse area can be written in an idiomatic
; fashion (e.g. TIB >IN @ +).
defword "tib",TIBVAR
push word TIB
NEXT
defword "state",STATEVAR
push word STATE
NEXT
defword ">in",TOINVAR
push word TOIN
NEXT
; Strategically define next here so most jumps to it are short,
; saving extra bytes that would be taken by near jumps.
next:
lodsw ; load next word's address into AX
jmp ax ; jump directly to it
; Words and data space for the HERE and LATEST variables.
defword "here",HEREVAR
push word HERE
NEXT
HERE: dw start_HERE
defword "latest",LATESTVAR
push word LATEST
NEXT
LATEST: dw word_SEMICOLON ; initialized to last word in dictionary
; Define a couple of I/O primitives to make things interactive.
; They can also be used to build a richer interpreter loop.
;
; KEY waits for a key press and pushes its scan code (AH) and
; ASCII character (AL) to the stack, both in a single cell.
defword "key",KEY
mov ah,0
int 0x16
push ax
NEXT
; EMIT writes to the screen the ASCII character corresponding to
; the lowest 8 bits of the value at the top of the stack.
defword "emit",EMIT
pop ax
call writechar
NEXT
; The colon compiler reads a name from the terminal input buffer,
; creates a dictionary entry for it, writes machine code to jump
; to DOCOL, updates LATEST and HERE, and switches to compilation
; state.
defword ":",COLON
call token ; parse word from input
push si
mov si,di ; set parsed word as string copy source
mov di,[HERE] ; set current value of HERE as destination
mov ax,[LATEST] ; get pointer to latest defined word
mov [LATEST],di ; update LATEST to new word being defined
stosw ; link pointer
mov al,cl
or al,F_HIDDEN ; hide new word while it's being defined
stosb ; word length
rep movsb ; word name
mov ax,0x26ff
stosw ; compile near jump, absolute indirect...
mov ax,DOCOL.addr
stosw ; ...to DOCOL
mov [HERE],di ; update HERE to next free position
mov byte [STATE],1 ; switch to compilation state
pop si
NEXT
; DOCOL sets up and starts execution of a user-defined words.
; Those differ from words defined in machine code by being
; sequences of addresses to other words, so a bit of code is
; needed to save the current value of SI (this Forth's instruction
; pointer), and point it to the sequence of addresses that makes
; up a word's body.
;
; DOCOL advances AX 4 bytes, and then moves that value to SI. When
; DOCOL is jumped to, AX points to the code field of the word
; about to be executed. The 4 bytes being skipped are the actual
; jump instruction to DOCOL itself, inserted by the colon compiler
; when it creates a new entry in the dictionary.
DOCOL:
xchg sp,bp ; swap SP and BP, SP controls return stack
push si ; push current "instruction pointer"
xchg sp,bp ; restore SP and BP
add ax,4 ; skip word's code field
mov si,ax ; point "instruction pointer" to word body
NEXT ; start executing the word
; The jump instruction inserted by the compiler is an indirect
; jump, so it needs to read the location to jump to from another
; memory location.
.addr: dw DOCOL
; Semicolon is the only immediate primitive. It writes the address
; of EXIT to the end of a new word definition, makes the word
; visible in the dictionary, and switches back to interpretation
; state.
defword ";",SEMICOLON,F_IMMEDIATE
mov bx,[LATEST]
and byte [bx+2],~F_HIDDEN ; reveal new word
mov byte [STATE],0 ; switch to interpretation state
mov ax,EXIT ; prepare to compile EXIT
compile:
mov di,[HERE]
stosw ; compile contents of AX to HERE
mov [HERE],di ; advance HERE to next cell
NEXT
; Execution starts here.
start:
cld ; clear direction flag
; Set up segment registers to point to the same segment as CS.
push cs
push cs
push cs
pop ds
pop es
pop ss
; Skip error signaling on initialization
jmp init
; Display a red '!!' to let the user know an error happened and the
; interpreter is being reset
error:
mov ax,0x0921 ; write '!'
mov bx,0x0004 ; black background, red text
mov cx,2 ; twice
int 0x10
; Initialize stack pointers, state, and terminal input buffer.
init:
mov bp,RP0 ; BP is the return stack pointer
mov sp,SP0 ; SP is the data stack pointer
; Fill TIB with zeros, and set STATE and >IN to 0
mov al,0
mov cx,STATE+4
mov di,TIB
rep stosb
; Enter the interpreter loop.
;
; Words are read one at time and searched for in the dictionary.
; If a word is found in the dictionary, it is either interpreted
; (i.e. executed) or compiled, depending on the current state and
; the word's IMMEDIATE flag.
;
; When a word is not found, the state of the interpreter is reset:
; the data and return stacks are cleared as well as the terminal
; input buffer, and the interpreter goes into interpretation mode.
interpreter:
call token ; parse word from input
mov bx,[LATEST] ; start searching for it in the dictionary
.1: test bx,bx ; zero?
jz error ; not found, reset interpreter state
mov si,bx
lodsw ; skip link
lodsb ; read flags+length
mov dl,al ; save those for later use
test al,F_HIDDEN ; entry hidden?
jnz .2 ; if so, skip it
and al,LENMASK ; mask out flags
cmp al,cl ; same length?
jne .2 ; if not, skip entry
push cx
push di
repe cmpsb ; compare strings
pop di
pop cx
je .3 ; if equal, search is over
.2: mov bx,[bx] ; skip to next entry
jmp .1 ; try again
.3: mov ax,si ; after comparison, SI points to code field
mov si,.loop ; set SI so NEXT loops back to interpreter
; Decide whether to interpret or compile the word. The IMMEDIATE
; flag is located in the most significant bit of the flags+length
; byte. STATE can only be 0 or 1. When ORing those two, these are
; the possibilities:
;
; IMMEDIATE STATE OR ACTION
; 0000000 0000000 00000000 Interpret
; 0000000 0000001 00000001 Compile
; 1000000 0000000 10000000 Interpret
; 1000000 0000001 10000001 Interpret
;
; A word is only compiled when the result of that OR is 1.
; Decrementing that result sets the zero flag for a conditional
; jump.
and dl,F_IMMEDIATE ; isolate IMMEDIATE flag
or dl,[STATE] ; OR with state
dec dl ; decrement
jz compile ; if result is zero, compile
jmp ax ; otherwise, interpret (execute) the word
.loop: dw interpreter
; Parse a word from the terminal input buffer and return its
; address and length in DI and CX, respectively.
;
; If after skipping spaces a 0 is found, more input is read from
; the keyboard into the terminal input buffer until return is
; pressed, at which point execution jumps back to the beginning of
; token so it can attempt to parse a word again.
;
; Before reading input from the keyboard, a CRLF is emitted so
; the user can enter input on a fresh, blank line on the screen.
token:
mov di,[TOIN] ; starting at the current position in TIB
mov cx,-1 ; search "indefinitely"
mov al,32 ; for a character that's not a space
repe scasb
dec di ; result is one byte past found character
cmp byte [di],0 ; found a 0?
je .readline ; if so, read more input
mov cx,-1 ; search "indefinitely" again
repne scasb ; this time, for a space
dec di ; adjust DI again
mov [TOIN],di ; update current position in TIB
not cx ; after ones' complement, CX=length+1
dec cx ; adjust CX to correct length
sub di,cx ; point to start of parsed word
ret
.readline:
mov al,13
call writechar ; CR
mov al,10
call writechar ; LF
mov di,TIB ; read into TIB
.1: mov ah,0 ; wait until a key is pressed
int 0x16
cmp al,13 ; return pressed?
je .3 ; if so, finish reading
cmp al,8 ; backspace pressed?
je .2 ; if so, erase character
call writechar ; otherwise, write character to screen
stosb ; store character in TIB
jmp .1 ; keep reading
.2: cmp di,TIB ; start of TIB?
je .1 ; if so, there's nothing to erase
dec di ; erase character in TIB
call writechar ; move cursor back one character
mov ax,0x0a20 ; erase without moving cursor
mov cx,1
int 0x10 ; (BH already set to 0 by writechar)
jmp .1 ; keep reading
.3: mov ax,0x0020
stosw ; put final delimiter and 0 in TIB
call writechar ; write a space between user input and
; execution output
mov word [TOIN],0 ; point >IN to start of TIB
jmp token ; try parsing a word again
; writechar writes a character to the screen. It uses INT 10/AH=0e
; to perform teletype output, writing the character, updating the
; cursor, and scrolling the screen, all in one go. Writing
; backspace using the BIOS only moves the cursor backwards within
; a line, but does not move it back to the previous line.
; writechar addresses that.
writechar:
push ax ; INT 10h/AH=03h clobbers AX in some BIOSes
mov bh,0 ; video page 0 for all BIOS calls
mov ah,3 ; get cursor position (DH=row, DL=column)
int 0x10
pop ax ; restore AX
mov ah,0x0e ; teletype output
mov bl,0x7 ; black background, light grey text
int 0x10
cmp al,8 ; backspace?
jne .1 ; if not, nothing else to do
test dl,dl ; was cursor in first column?
jnz .1 ; if not, nothing else to do
mov ah,2 ; move cursor
mov dl,79 ; to last column
dec dh ; of previous row
int 0x10
.1: ret
times 510-($-$$) db 0
db 0x55, 0xaa
; New dictionary entries will be written starting here.
start_HERE: