aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexis Hovorka <[email protected]>2022-11-30 13:08:30 -0700
committerAlexis Hovorka <[email protected]>2022-11-30 13:08:30 -0700
commitff4254830c6ed2d0ba94a66d833fc4e86f9ae6bd (patch)
treedaf95d206b25cf1a7da875861677c6e73bbe1cae
Initial commit
-rw-r--r--io.notes178
-rw-r--r--risc.isa258
-rw-r--r--risc.todo228
-rw-r--r--rtl/Makefile42
-rw-r--r--rtl/alu.v107
-rw-r--r--rtl/boot.mem52
-rw-r--r--rtl/bram.v41
-rw-r--r--rtl/cla.v42
-rw-r--r--rtl/debouncer.v28
-rw-r--r--rtl/northbridge.v40
-rw-r--r--rtl/project.ys2
-rw-r--r--rtl/regfile.v27
-rw-r--r--rtl/southbridge.v98
-rw-r--r--rtl/test/cpu_tb.v29
-rw-r--r--rtl/test/prog.mem16
-rw-r--r--rtl/top.v90
-rw-r--r--rtl/uart_rx.v74
-rw-r--r--rtl/uart_tx.v74
-rw-r--r--rtl/ulx3s.lpf617
-rw-r--r--rtl/unprivileged_cpu.v117
-rw-r--r--src/echo.prog9
-rw-r--r--src/f1.f1311
-rw-r--r--src/f1.prog258
-rw-r--r--src/led.prog10
-rw-r--r--src/print.prog30
-rwxr-xr-xsrc/sendprog36
26 files changed, 2814 insertions, 0 deletions
diff --git a/io.notes b/io.notes
new file mode 100644
index 0000000..49b54c8
--- /dev/null
+++ b/io.notes
@@ -0,0 +1,178 @@
+GPIO (TODO separate IO?)
+- i 4 switches
+- i 7 buttons
+- o8 leds
+- o1 esp enable
+- o1 piezo (TODO ADC?)
+- o1 tft reset
+- o1 inky reset
+- o1 uart0 select
+- o1 uart1 select
+SPI ctl
+- flash
+- sd
+- adc
+- tft
+- inky cmd
+- inky data
+SPI dat
+- also busy/wait signal from tft/inky
+I2C0 internal
+I2C1 external (or can they be wired together?)
+UART0 cmd (ftdi/ext)
+UART0 dat
+UART1 cmd (esp)
+UART1 dat
+PS/2
+Neopixels
+
+Clocks
+Watchdog
+- Reset "password"?
+- Action (interrupt/reset)
+
+TRNG?
+
+DMA
+
+-----------------------
+
+UART has
+- in
+- out
+- rts (output, ready to recieve)
+- cts (input, allowed to send)
+
+- two interrupts (rx, tx)
+
+- two registers (ctl, data)
+https://stnolting.github.io/neorv32/#_primary_universal_asynchronous_receiver_and_transmitter_uart0
+
+- fifo?
+
+-----------------------
+
+SPI has
+- one interrupt (complete)
+- two registers (ctl, data)
+ - cpol/cpha
+ - chip select
+ - clock divider/prescaler?
+ - interrupt config?
+- fifo?
+
+-----------------------
+
+I2C has
+- two interrupts (complete, self addr received)
+
+-----------------------
+
+Standard RISC-V interrupts:
+
+- mtime_irq_i: Machine timer interrupt from processor-external MTIME unit
+ (MTI). This IRQ is only available if the processor-internal MTIME unit is not
+ used (IO_MTIME_EN = false).
+- msw_irq_i: Machine software interrupt (MSI). This interrupt is used for
+ inter-processor interrupts in multi-core systems. However, it can also be
+ used for any custom purpose.
+- mext_irq_i: Machine external interrupt (MEI). This interrupt is used for any
+ processor-external interrupt source (like a platform interrupt controller).
+
+The RISC-V standard interrupts are level-triggered and high-active. Once set
+the signal has to stay high until the interrupt request is explicitly
+acknowledged (e.g. writing to a memory-mapped register). The RISC-V standard
+interrupts can NOT be acknowledged by writing zero to the according mip CSR bit.
+
+-----------------------
+
+https://stnolting.github.io/neorv32/#_address_space
+
+-----------------------
+
+CSRs:
+- Machine status
+ - Trap WFI in user mode
+ - Privilege level for data access in machine mode (use PPL)
+ - Previous privilege level
+ - Global interrupt enable
+ - Previous global interrupt enable
+- Machine interrupt enable
+ - One bit per interrupt
+- Machine trap vector
+- Machine scratch register (for traps)
+- Machine exception program counter
+- Machine trap cause
+ - High bit indicates exc vs int
+- Machine trap value (bad addr or inst)
+ - Address of faulting access
+- Machine interrupt pending
+- Physical memory protection
+- Machine cycle counter
+- Machine instruction retired counter
+- Machine timer
+- Machine counter setup
+ - User allowed to read each (bool)
+ - Inhibit
+- Machine hardware ID (vendor/arch/version)
+- Machine hardware thread ID
+
+Traps:
+- Exceptions:
+ - Instruction access
+ - Illegal instruction
+ - Store access
+ - Load access
+ - Machine ecall
+ - User ecall
+ - User wfi
+ - Breakpoint
+- Interrupts:
+ - Reset (with cause?)
+ - Machine external
+ - Machine software (typically inter-hart)
+ - Machine timer
+ - Extra
+
+`wfi` can kinda be implemented as `j -1` since it reduces switching...?
+So then if we allow catching it in user mode that should work
+And if interrupts are disabled, then we can trigger `shutdown`?
+
+Interrupt enable is cleared on entry to a handler, and filled with previous
+value on ERET. The interrupt pending bit is not automatically cleared
+
+CSR read/write
+CSR set/clear bit
+CSR immediate...
+
+ECALL Moves up a privilege level intentionally
+EBREAK Moves up a level as a breakpoint for debugging
+ERET Moves down a level using EPC and previous privilege level
+
+PMAs:
+- Atomicity
+- Mem/IO/Empty
+- Ordering...
+- Coherence (?)
+- Cacheability
+- Idempotency
+
+PMP:
+- RWX
+- Enable
+- Address encoding (?)
+- Lock (enforce in machine mode, clear only on reset)
+
+PMPs are statically ordered, RISC V has 16
+
+TLB clear on specific address
+
+- MXR Make eXecute Readable
+- SUM Supervisor User Memory mode
+
+- Valid
+- RWX
+- User mode
+- Global (both)
+- Accessed (RWX)
+- Dirty (W)
diff --git a/risc.isa b/risc.isa
new file mode 100644
index 0000000..7304ac6
--- /dev/null
+++ b/risc.isa
@@ -0,0 +1,258 @@
+RISC-16 ISA
+================================================================
+
+TODO: Intro
+
+Unprivileged Instructions
+----------------------------------------------------------------
+
+ .-----------[Formats]-----------.
+ |F E D C B A 9 8 7 6 5 4 3 2 1 0|
+ |-------------------------------|
+X: | op | rA | unsignedImmediate |
+Y: | op | rA | rB | signedImmed |
+Z: | op | rA | rB | rC | op2 |
+ '-------------------------------'
+
+ .-------------------------------.
+ALU: | 000 | rA | rB | rC | op2 | rA = op2(rB, rC)
+ |-------------------------------|
+AND: | 000 | rA | rB | rC | 0000 | rA = rB & rC
+OR: | 000 | rA | rB | rC | 0001 | rA = rB | rC
+XOR: | 000 | rA | rB | rC | 0010 | rA = rB ^ rC
+NOT: | 000 | rA | rB | 0 | 0010 | rA = ~rB
+ |-------------------------------|
+ADD: | 000 | rA | rB | rC | 0100 | rA = rB + rC
+SUB: | 000 | rA | rB | rC | 0101 | rA = rB - rC
+ |-------------------------------|
+LT: | 000 | rA | rB | rC | 1000 | rA = rB < rC signed
+LTU: | 000 | rA | rB | rC | 1001 | rA = rB < rC unsigned
+GE: | 000 | rA | rB | rC | 1010 | rA = rB >= rC signed
+GEU: | 000 | rA | rB | rC | 1011 | rA = rB >= rC unsigned
+ |-------------------------------|
+SHL: | 000 | rA | rB | rC | 0011 | rA = rB << ±rC
+SHA: | 000 | rA | rB | rC | 0110 | rA = rB <<< ±rC
+ROL: | 000 | rA | rB | rC | 0111 | rA = rB @ rC (roll left)
+ |-------------------------------|
+MSBL: | 000 | rA | rB | 0 | 0011 | rA = rB >> 8
+MSBA: | 000 | rA | rB | 0 | 0110 | rA = rB >>> 8
+LSBL: | 000 | rA | rB | 0 | 0000 | rA = rB & 0xFF
+LSBA: | 000 | rA | rB | 0 | 0001 | rA = rB & 0xFF (s-ext)
+SWPB: | 000 | rA | rB | 0 | 0111 | rA = {rB[7:0], rB[15:8]}
+ |-------------------------------|
+ADDI: | 001 | rA | rB | signedImmed | rA = rB + imm
+LUI: | 010 | rA | unsignedImmediate | rA = imm << 6
+LPC: | 010 | rA | 0 | rA = pc
+ |-------------------------------|
+LW: | 011 | rA | rB | signedImmed | rA = mem[rB + imm]
+SW: | 100 | rA | rB | signedImmed | mem[rB + imm] = rA
+ |-------------------------------|
+JALR: | 101 | rA | rB | signedImmed | rA = pc+1; pc = rB+imm
+BRA: | 110 | GZL | rB | signedImmed | if GZL(rB) pc = pc+1+imm
+ |-------------------------------|
+WFI: | 110 | 111 | 0 | -1 | Wait for interrupt/sleep
+ '-------------------------------'
+
+Pseudo instructions:
+NOP: AND 0,0,0
+NEG: SUB rA,0,rB
+BOOL: LTU rA,0,rB
+LNOT: GEU rA,0,rB
+MV rA,rB: ADDI rA,rB,0
+LI rA,imm: mix of LUI rA,imm; ADDI rA,rA,imm; ADDI rA,0,imm
+JR rB: JALR 0,rB,00
+JALR rB: JALR 7,rB,00
+RET: JALR 0,7,00
+CALL addr: LUI 7,addrh; JALR 7,7,addrl
+PUSH rA: SW rA,6,0; ADDI 6,6,-1
+POP rA: ADDI 6,6,1; LW rA,6,0
+PEEK rA: LW rA,6,1
+PEEK rA,off: LW rA,6,1+off
+BZ,BNZ,BGZ,BLZ,BGE,BLE,J
+
+Registers:
+0 Zero
+1 Arg / Rval | Caller
+2 Arg | Caller
+3 Arg | Caller
+4 | Callee
+5 | Callee
+6 Stack Ptr | Callee
+7 Return Addr | Caller
+
+Privileged Instructions (WIP)
+----------------------------------------------------------------
+
+SYS: | 111 | rA | rB | rC | op2 | System function
+
+RESERVED | 000 | rA | rB | rC | 11XX |
+
+TRAP: | 111 | 0 | 0 | 111 | trap |
+FENCE: | 111 | 0 | 0 | 001 | 0000 | Flush/invalidate data cache
+FENCEI: | 111 | 0 | 0 | 001 | 0001 | Invalidate instruction cache
+
+Privileged instructions:
+TLBW: | 111 | rA | rB | 001 | 0001 |
+RFE: | 111 | 0 | rB | 011 | 0 |
+SYS: | 111 | 0 | 0 | op | data | op+data from ID
+EXT: | 111 | rA | rB | op | data | op+data from ID
+
+CSR read/write
+CSR set/clear bit
+CSR immediate...
+
+csrrw, csrrc, csrrs: Read the specified CSR into a destination register and
+either write a source operand value to the register (csrrw), clear any 1 bit in
+the source operand in the register (csrrc), or set any 1 bit in the source
+operand in the register (csrrs). These instructions take three operands: The
+first is the destination register receiving the value read from the CSR, the
+second is the CSR address, and the third is a source register.
+
+ECALL Moves up a privilege level intentionally
+EBREAK Moves up a level as a breakpoint for debugging
+ERET Moves down a level using EPC and previous privilege level
+
+LR/SC?
+
+Privileged ops:
+SYS_MODE: 000 [MODE_RUN, MODE_SLEEP, MODE_HALT, MODE_RFU3-7, MODE_PANIC8-15] (Reserved for Future Use)
+SYS_TLB: 001 [FENCE, FENCEI, TLB_READ, TLB_WRITE, TLB_CLEAR]
+SYS_CRMV: 010 rA = cr[data]; cr[data] = rB; TODO define control registers
+SYS_RFE: 011
+RESERVED: 100
+SYS_INT: 101 [INT_IO, INT_CLOCK, INT_TIMER] TODO
+SYS_EXC: 110 [EXC_GENERAL, EXC_TLBUMISS, EXC_TLBKMISS, EXC_INVALIDOP, EXC_INVALIDADDR, EXC_PRIV]
+SYS_TRAP: 111 [TRAP_GENERAL, TRAP_BREAKPOINT, TRAP_HALT]
+
+SYS_INT and SYS_EXC don't *need* to be instructions but it makes it a little
+easier to implement the ISA electronically
+
+Kernel registers:
+0 Zero
+1 GPR1
+2 GPR2
+3 GPR3/TLB (Scratch; gets calculated PTE addr) | The RISC-V spec
+4 PSR (Processor status, see below) | puts all of this
+5 ISR (Current interrupt flags) | into Control Registers?
+6 IMR (Interrupt mask) | That would free up
+7 EPC (Gets return address for int/exc/trap) | some useful space...
+---------
+Program Counter
+Atomic reg/flag
+
+----------------------------------------------------------------
+
+Boot:
+EEPROM loaded at 0x8000 because kernel lower pages are fixed
+PTE initialized with one entry (0x8080 -> 0x0000, EEPROM)
+EEPROM code tries to load from hard drive, if it fails it has a basic monitor
+
+Standardized non-volatile storage access for at least a configurable
+bootloader... Unless that's left up to the memory controller? And on reset it
+loads from an EEPROM into a model-specific start address? Simple state machine?
+I still like the "starts in kernel mode at 0x8000 on physical page zero" thing,
+but maybe instead of being hardwired to the EEPROM it's loaded into RAM by the
+memory controller? Wait, but what about when there isn't a memory controller,
+in the simple single-cycle sequential version... maybe it's hardwired to the
+EEPROM then...?
+
+----------------------------------------------------------------
+
+Processor Status Register: iiiirrkkCcaaaaaa
+- Core ID (0-15)
+- Reset reason (power, external btn/other core, watchdog, self)
+- Kernel mode counter (0 = user mode)
+- Carry bit (kernel, user)
+- Application space ID (for memory paging)
+
+Control Registers: TODO
+It'd be good to have some kind of IO device detection
+Timers...
+
+Counters: TODO
+Cycle Cycles passed since reset
+Instr Instructions processed since reset
+Time Wall clock time since reset
+Watchdog 0=off, counts down, resets core when reaches zero
+
+Control Status Registers:
+- Timers (x3, plus 3 for compare? So 18? Or 24 for 64 bit... Enable/inhibit flags?)
+ - Cycle
+ - Instr
+ - Watchdog?
+- PSR (Processor status)
+ - ASID
+ - kernel mode
+ - reset reason
+ - core ID
+ - memory access translation while in kernel mode flag
+- TLB (Maybe interrupt data? gets calculated PTE addr)
+- Interrupt scratch register?
+- ISR (Current interrupt flags)
+- IMR (Interrupt mask)
+- EPC (Gets return address for int/exc/trap)
+- Interrupt vector?
+- ISA Version/features?
+- Coprocessor? FPU?
+- High mul/div word? Or just through SWPH instr...
+
+CSR access modes:
+- Read
+- Write
+- Set bit
+- Clear bit
+
+Leaves two bits for "CSR page" and three bits for CSR ID in rB, so max 32
+
+OS has to be able to tell what I/O is available and how much memory there is
+
+----------------------------------------------------------------
+
+Big endian! [01, 02, 03, 04] in IO becomes [0102, 0304] in mem/reg
+With 6 bit ASID, total of up to 8MB of RAM (4M addresses, 2^22)
+16 bit address space broken into 256 pages of 256 words each
+64 page tables (one per ASID)
+
+TLB: n entries, vkaaaaaannnnnnnn -> 00pppppppppppppp
+Valid bit, kernel bit, ASID, virtual page number -> physical page number
+Extra bits in page number reserved for future access control (Execute/Write)
+If kernel bit is set, ignore ASID? TODO
+Valid bit forced 1 on TLBW
+
+Memory write control with a recoverable illegal access exception would be
+useful for implementing forks with copy-on-write
+
+----------------------------------------------------------------
+
+Kernel Page 0:
+80-FF Kernel Page Table
+- C0-FF Root Page Table, points to application page tables
+- 80-BF Whatever
+70-7F Trap Vectors
+60-6F Exception Vectors
+50-5F Interrupt Vectors
+48-4F Kernel Registers
+40-47 User Registers
+00-3F Kernel Save Area
+
+Maybe 00-7F are unique per core? "Register page"?
+And EEPROM occupies 00-7F of physical memory!
+00-7F or 00-FF? TODO
+
+----------------------------------------------------------------
+
+RISC-V Notes:
+Memory access mode translation flag
+Scratch CSR for handlers...
+EPC as CSR, single trap handler address with "cause" CSR
+Clock enable/inhibit flags?
+TLB access as CSR?
+Debugging user software...
+Timer compare registers, trigger an interrupt when greater or equal
+2^32 milliseconds = 49 days, so RISC-V's are 64 bits... 48 bits would last 8000 years
+RISC-V says the RTC, even monotonic, should be memory mapped for power reasons
+Avoid interrupts by setting lower word(s) to -1 first (probably unnecessary, kernel mode)
+Breakpoint exception, access fault
+If a cause register is used, define priority of causes
+Interrupt/Exception data register? e.g. bad instruction, pte addr, reset type...
+TODO Paging...?
diff --git a/risc.todo b/risc.todo
new file mode 100644
index 0000000..5332351
--- /dev/null
+++ b/risc.todo
@@ -0,0 +1,228 @@
+Instruction questions:
+- 0 ALU C-F
+ - MUL
+ - DIV
+ - MOD?
+ - SWPH?
+
+mul: Multiply two 32-bit registers and store the lower 32 bits of the result in
+ the destination register.
+mulh. mulhu, mulhsu: Multiply two 32-bit registers and store the upper 32 bits
+ of the result in the destination register. Treat the multiplicands as both
+ signed (mulh), both unsigned (mulhu), or signed rs1 times unsigned rs2
+ (mulhsu). rs1 is the first source register in the instruction and rs2 is the
+ second.
+div, divu: Perform division of two 32-bit registers, rounding the result
+ toward zero, on signed (div) or unsigned (divu) operands.
+rem, remu: Return the remainder corresponding to the result of a div or divu
+ instruction on the operands. Division by zero does not raise an exception. To
+ detect division by zero, code should test the divisor and branch to an
+ appropriate handler if it is zero.
+
+Oberon has a "high" CSR for mul/div... feels weird? but effective...
+The high CSR also gets the remainder on a div, leaving space for unsigned multiply?
+Dividing by zero... RISC-V just ignores, leaves to software
+
+The quotient of division by zero has all bits set, and the remainder of
+division by zero equals the dividend. Signed division overflow occurs only when
+the most-negative integer is divided by  − 1. The quotient of a signed division
+with overflow is equal to the dividend, and the remainder is zero. Unsigned
+division overflow cannot occur.
+
+Signed or unsigned??
+
+Carry flags form pipeline bottlenecks/hazards...
+Flag bits are cheap in microcode/single issue, wider registers are cheap in out-of-order
+ALU exceptions are apparently considered a PITA, OS level multiply is annoying
+The intended design of RISC-V is that you'd do `add 1,2,3; lt 4,1,2`
+and then r4 holds your "carry out"
+
+Floating point? Math coprocessor? Or should that be IO
+
+Atomic ops:
+LW stores the loaded address in a special invisible atomic register and sets
+the (also invisible) atomic flag. Any writes from any core to the address in
+the register will clear the flag, as will SYS_RFE. SC tries to write to its
+specified address, but only does so if the address is identical to the atomic
+register (i.e. last loaded address) and the atomic flag is set (so it hasn't
+been touched since the load). After an SC, the atomic flag is clear (as part of
+the write) and the data register is set to 0 if the write was successful or -1
+if the write failed, allowing the program to retry the access. The atomic
+register and flag don't need to be read from or saved, since any context that
+would require that would already invalidate the atomic access. The atomic flag
+is NOT directly affected by MMIO devices, though interrupts may occur, so care
+should be taken to avoid unintended conflicts.
+
+-------------------------------------------------------------------------------
+
+Assembler questions:
+- Macro definition
+- Addressing
+- Segments?
+
+Uxntal
+===============
+
+Padding | Literals
+| absolute $ relative | # literal hex
+ |
+Labels | Ascii
+@ parent & child | " raw ascii
+ |
+Addressing | Pre-processor
+, lit relative | % macro-define ~ include
+. lit zero-page - raw byte |
+; lit absolute = raw short |
+
+%EMIT { #18 DEO }
+%HALT { #010f DEO }
+
+( init )
+|0100 @program
+ ;hello-word
+ &while
+ ( send ) LDAk EMIT
+ ( loop ) INC2 LDAk ,&while JCN
+ POP2
+ HALT
+BRK
+
+@hello-word "Hello 20 "World! 00
+
+MIPS/JonesForth
+===============
+
+ .text # following instructions placed in text
+ .data # following objects placed in data
+
+ .globl # make symbol available globally
+
+ .org 0x0100 # set absolute position
+a: .space 18 # uchar a[18]; or uint a[4];
+ .align 2 # align next object on 2^2-byte addr
+
+i: .word 2 # unsigned int i = 2;
+v: .word 1,3,5 # unsigned int v[3] = {1,3,5};
+h: .half 2,4,6 # unsigned short h[3] = {2,4,6};
+b: .byte 1,2,3 # unsigned char b[3] = {1,2,3};
+f: .float 3.14 # float f = 3.14;
+
+s: .asciiz "abc" # char s[4] {'a','b','c','\0'};
+t: .ascii "abc" # char s[3] {'a','b','c'};
+
+ li $t1, 5
+ sw $t0, x
+
+ .macro NEXT
+ lodsl
+ jmp *(%eax)
+ .endm
+
+ .macro PUSHRSP reg
+ lea -4(%ebp),%ebp
+ movl \reg,(%ebp)
+ .endm
+
+ PUSHRSP %esi
+ NEXT
+
+ .macro defcode name, namelen, flags=0, label
+ .section .rodata
+ .align 4
+ .globl name_\label
+name_\label :
+ .int link
+ .set link,name_\label
+ .byte \flags+\namelen
+ .ascii "\name"
+ .align 4
+ .globl \label
+\label :
+ .int code_\label
+ .text
+ //.align 4
+ .globl code_\label
+code_\label :
+ .endm
+
+ defcode "DROP",4,,DROP
+ pop %eax
+ NEXT
+
+RISC-V ABI
+===============
+
+lui a0, %hi(symbol)
+lw a0, %lo(symbol)(a0)
+
+lui a0, %hi(symbol)
+sw a1, %lo(symbol)(a0)
+
+lui a0, %hi(symbol)
+addi a0, a0, %lo(symbol)
+
+.Ltmp0: auipc a0, %pcrel_hi(symbol)
+ lw a0, %pcrel_lo(.Ltmp0)(a0)
+
+.Ltmp1: auipc a0, %pcrel_hi(symbol)
+ sw a1, %pcrel_lo(.Ltmp1)(a0)
+
+.Ltmp2: auipc a0, %pcrel_hi(symbol)
+ addi a0, a0, %pcrel_lo(.Ltmp2)
+
+Treat as pair? e.g.
+%symbol | lui 1, %hi(symbol)
+addi 2,1,0 | addi 2, 1, %lo(symbol)
+
+-------------------------------------------------------------------------------
+
+Whoa, what if, like, applications could request access to specific coprocessors
+from the OS based on their ASID and a 16/32 bit coprocessor model ID? And there
+were certain registers they could access and an interrupt the OS could pass
+along to a registered handler for when some status updated or results were
+ready? And the coprocessor could DMA to the ASID's address space? That'd solve
+mul/div, float, vector, user-mode drivers for I/O, other extensibility...
+
+To do DMA, a coprocessor sends an interrupt to the processor asking for what
+physical page it's supposed to access, and the processor basically just treats
+it like a TLB write, so then user-mode software doesn't have to care about page
+sizes or virtual memory at all!
+
+ASID 0 is reserved for kernel coprocessor DMA! Data access translation flag!
+ASID determines coprocessor instructions, so kernel can allocate coprocessor
+and perform operations "on behalf" of applications if necessary
+
+What if instead of registers, coprocessors gained control over four of the ALU
+instructions, with register reads and writes included?
+
+So then what, like, some sort of back-end coprocessor bus?
+- Coprocessor ID
+- Instruction
+- rB
+- rC
+- Writeback rA
+- Writeback interrupt + own ID?
+- Writeback interrupt data?
+- "Busy" if uniquely claimed...?
+And then coprocessors also have access to the "front-end" bus for main memory access
+Which in turn allows individual cores' memory units to watch for ld/sc invalidations!
+And L1 invalidations too maybe? IDK, maybe software just has to fence
+The front-end bus probably needs to use full 22 (or whatever) bit "physical"
+addressing then huh... Should be fine? Wait but then what about protections...
+
+So wait, does that mean any SC should be immediately flushed? Oh... how do the
+other cores know to flush their L1 for reads if ST is used instead... Maybe
+software fence convention... Mmm this feels a tad complicated. How finely are
+the L1 caches chunked? 16/8/4 words? Balance between speed and occupying the bus.
+Maybe there could be multiple lanes on the front bus? And back for that matter
+
+How are coprocessors detected? Maybe the interrupt ID writeback is pull-down?
+Solves interrupt priority too. And coprocessors stop responding to the
+"identify" command when they've been initialized? Each coprocessor has a unique
+hardware ID, and it responds to initialization with its genre and model? Ooh,
+do main cores get IDs too? That would allow for easy secondary core detection
+and initialization. Maybe certain cores could have fixed dedicated
+"coprocessors" too, like a dedicated FPU core or whatever
+
+Chip-external coprocessors would probably need a bridge of some kind, that
+would be a lot of wires otherwise, even for one lane
diff --git a/rtl/Makefile b/rtl/Makefile
new file mode 100644
index 0000000..22e7210
--- /dev/null
+++ b/rtl/Makefile
@@ -0,0 +1,42 @@
+TOPMOD := top
+ULX3S_SIZE := 85
+.PHONY: all clean prog progt progf progw progwt progwf
+
+all: ulx3s.bit
+
+clean:
+ rm -rf $(TOPMOD).json ulx3s_out.config ulx3s.bit
+
+ulx3s.bit: ulx3s_out.config
+ ecppack ulx3s_out.config ulx3s.bit
+
+ulx3s_out.config: $(TOPMOD).json
+ nextpnr-ecp5 --$(ULX3S_SIZE)k \
+ --json $(TOPMOD).json \
+ --lpf ulx3s.lpf \
+ --textcfg ulx3s_out.config
+
+$(TOPMOD).json: project.ys $(wildcard *.v) $(wildcard *.mem)
+ yosys project.ys
+
+prog: ulx3s.bit
+ fujprog ulx3s.bit
+
+progt: ulx3s.bit
+ fujprog ulx3s.bit
+ tio /dev/ttyUSB0
+
+progf: ulx3s.bit
+ fujprog -j FLASH ulx3s.bit
+
+progw: ulx3s.bit
+ fujprog.exe ulx3s.bit
+ tput reset
+
+progwt: ulx3s.bit
+ fujprog.exe -t ulx3s.bit
+ tput reset
+
+progwf: ulx3s.bit
+ fujprog.exe -j FLASH ulx3s.bit
+ tput reset
diff --git a/rtl/alu.v b/rtl/alu.v
new file mode 100644
index 0000000..99c86ff
--- /dev/null
+++ b/rtl/alu.v
@@ -0,0 +1,107 @@
+`default_nettype none
+
+module SubALU
+#(parameter WIDTH = 16)
+ (input wire [WIDTH-1:0] x,
+ input wire [WIDTH-1:0] y,
+ input wire [ 2:0] op,
+ input wire sub,
+ output reg [WIDTH-1:0] out);
+
+ wire [WIDTH-1:0] true_y = sub? ~y : y;
+ wire [WIDTH-1:0] anded = x & true_y;
+ wire [WIDTH-1:0] xored = x ^ true_y;
+ wire [WIDTH-1:0] ored = x | true_y;
+
+ // TODO Carry look-ahead?
+ wire [WIDTH:0] added = x + {true_y[WIDTH-1], true_y} + sub;
+
+ always @* case (op)
+ 3'b000: out = anded;
+ 3'b001: out = ored;
+ 3'b010: out = xored;
+ 3'b011: out = added[WIDTH-1:0];
+ 3'b100: out = {{WIDTH-1{1'b0}}, added[WIDTH-1]};
+ 3'b101: out = {{WIDTH-1{1'b0}}, added[WIDTH ]};
+ 3'b110: out = {{WIDTH-1{1'b0}}, ~added[WIDTH-1]};
+ 3'b111: out = {{WIDTH-1{1'b0}}, ~added[WIDTH ]};
+ endcase
+endmodule
+
+module Shifter
+ (input wire [15:0] val,
+ input wire [ 4:0] amt,
+ input wire shift,
+ input wire arith,
+ output wire [15:0] out);
+
+ wire maskL = shift & amt[4];
+ wire zeroR = shift & ~amt[4];
+
+ wire [15:0] midA = amt[0]? { val[14:0], zeroR? 1'd0 : val[15 ]} : val;
+ wire [15:0] midB = amt[1]? {midA[13:0], zeroR? 2'd0 : midA[15:14]} : midA;
+ wire [15:0] midC = amt[2]? {midB[11:0], zeroR? 4'd0 : midB[15:12]} : midB;
+ wire [15:0] midD = amt[3]? {midC[ 7:0], zeroR? 8'd0 : midC[15: 8]} : midC;
+
+ wire [15:0] maskA = amt[0]? 16'hFFFE : 16'hFFFF;
+ wire [15:0] maskB = amt[1]? {maskA[13:0], 2'd0} : maskA;
+ wire [15:0] maskC = amt[2]? {maskB[11:0], 4'd0} : maskB;
+ wire [15:0] maskD = amt[3]? {maskC[ 7:0], 8'd0} : maskC;
+
+ wire [15:0] midSet = midD | maskD;
+ wire [15:0] midClr = midD & ~maskD;
+
+ assign out = maskL? ((arith & val[15])? midSet : midClr) : midD;
+endmodule
+
+module FullALU
+ (input wire [15:0] x,
+ input wire [15:0] y,
+ input wire [ 3:0] op,
+ input wire special,
+ output reg [15:0] out);
+
+ wire [15:0] saluOut;
+ wire [15:0] shftOut;
+
+ reg [2:0] saluOp;
+ reg saluSub;
+ reg shftS;
+ reg shftA;
+
+ SubALU salu(.x(x), .y(y), .out(saluOut),
+ .op(saluOp), .sub(saluSub));
+
+ Shifter shft(.val(x), .amt(special? 5'b11000 : y[4:0]),
+ .out(shftOut), .shift(shftS), .arith(shftA));
+
+ always @* begin
+ out = 0;
+ saluOp = 0;
+ saluSub = 0;
+ shftS = 0;
+ shftA = 0;
+
+ case (op)
+ 4'b0000: if (special) begin out = {8'd0, x[7:0]}; end else
+ begin out = saluOut; saluOp = 3'b000; saluSub = 0; end
+ 4'b0001: if (special) begin out = {{8{x[7]}}, x[7:0]}; end else
+ begin out = saluOut; saluOp = 3'b001; saluSub = 0; end
+ 4'b0010: if (special) begin out = ~x; end else
+ begin out = saluOut; saluOp = 3'b010; saluSub = 0; end
+ 4'b0011: begin out = shftOut; shftS = 1; shftA = 0; end
+ 4'b0100: begin out = saluOut; saluOp = 3'b011; saluSub = 0; end
+ 4'b0101: begin out = saluOut; saluOp = 3'b011; saluSub = 1; end
+ 4'b0110: begin out = shftOut; shftS = 1; shftA = 1; end
+ 4'b0111: begin out = shftOut; shftS = 0; shftA = 0; end
+ 4'b1000: begin out = saluOut; saluOp = 3'b100; saluSub = 1; end
+ 4'b1001: begin out = saluOut; saluOp = 3'b101; saluSub = 1; end
+ 4'b1010: begin out = saluOut; saluOp = 3'b110; saluSub = 1; end
+ 4'b1011: begin out = saluOut; saluOp = 3'b111; saluSub = 1; end
+ 4'b1100:;
+ 4'b1101:;
+ 4'b1110:;
+ 4'b1111:;
+ endcase
+ end
+endmodule
diff --git a/rtl/boot.mem b/rtl/boot.mem
new file mode 100644
index 0000000..b60f36e
--- /dev/null
+++ b/rtl/boot.mem
@@ -0,0 +1,52 @@
+// Minimal Loader
+// -----------------------------
+
+// Registers:
+// 1 char
+// 2 last char
+// 3 scratch
+// 4 char mask (0x000F)
+// 5 counter
+// 6 pointer
+// 7 word
+
+// Setup \ .org 0000
+2800 // 12000 \ addi 2,0,00
+300F // 1400F \ addi 4,0,0F
+381F // 1601F \ addi 6,0,@data
+
+// Loop start
+340F // 1500F \ addi 5,0,0F
+3C00 // 17000 \ addi 7,0,00
+
+// Read byte from serial
+647E // 3107E \ lw 1,0,@uart ; read
+C4FE // 6117E \ blz 1,-2 ; if busy, loop
+0CA2 // 03122 \ xor 3,1,2 ; get diff
+0D83 // 03303 \ msbl 3,3 ; get upper diff
+C9FB // 6237B \ bz 3,-5 ; if not new, loop
+2880 // 12100 \ mv 2,1 ; save new char id
+
+// If it's a space, start (jump to last word)
+0480 // 01100 \ lsbl 1,1
+2CE0 // 13160 \ addi 3,1,-20
+D581 // 65301 \ bnz 3,@decr
+A300 // 50600 \ jr 6
+
+// Decrement counter
+36FD // 1557D \ addi 5,5,-3
+
+// Shift into place
+04C0 // 01140 \ and 1,1,4
+CA82 // 62502 \ bz 5,@nosh
+04D3 // 01153 \ shl 1,1,5
+0494 // 01114 \ add 1,1,1
+1F94 // 07714 \ add 7,7,1
+
+// If unfinished, loop
+D6EF // 6556F \ bnz 5,@read
+
+// Store and loop
+3B01 // 16601 \ addi 6,6,1
+9F00 // 47600 \ sw 7,6,0
+DC6A // 6706A \ j @loop
diff --git a/rtl/bram.v b/rtl/bram.v
new file mode 100644
index 0000000..0e15911
--- /dev/null
+++ b/rtl/bram.v
@@ -0,0 +1,41 @@
+`default_nettype none
+
+module BlockRAM
+#(parameter DUAL_PORT = 1,
+ parameter DATA_WIDTH = 16,
+ parameter ADDR_WIDTH = 16,
+ parameter INITIAL_FILE = "")
+ (input [(DATA_WIDTH-1):0] data_in_a, data_in_b,
+ input [(ADDR_WIDTH-1):0] addr_a, addr_b,
+ input clken_a, clken_b, clk_a, clk_b, we_a, we_b,
+ output reg [(DATA_WIDTH-1):0] data_out_a, data_out_b);
+
+ reg [DATA_WIDTH-1:0] ram [0:2**ADDR_WIDTH-1];
+ generate if (INITIAL_FILE != "")
+ initial $readmemh(INITIAL_FILE, ram);
+ endgenerate
+
+ always @(posedge clk_a) begin
+ if (clken_a) begin
+ if (we_a) begin
+ ram[addr_a] <= data_in_a;
+ data_out_a <= data_in_a;
+ end else begin
+ data_out_a <= ram[addr_a];
+ end
+ end
+ end
+
+ generate if (DUAL_PORT)
+ always @(posedge clk_b) begin
+ if (clken_b) begin
+ if (we_b) begin
+ ram[addr_b] <= data_in_b;
+ data_out_b <= data_in_b;
+ end else begin
+ data_out_b <= ram[addr_b];
+ end
+ end
+ end
+ endgenerate
+endmodule
diff --git a/rtl/cla.v b/rtl/cla.v
new file mode 100644
index 0000000..56055d7
--- /dev/null
+++ b/rtl/cla.v
@@ -0,0 +1,42 @@
+`default_nettype none
+
+module full_adder
+ (input wire i_bit1,
+ input wire i_bit2,
+ input wire i_carry,
+ output wire o_sum,
+ output wire o_carry);
+
+ assign o_sum = i_bit1 ^ i_bit2 ^ i_carry;
+ assign o_carry = (i_bit1 ^ i_bit2) & i_carry) | (i_bit1 & i_bit2);
+endmodule
+
+module carry_lookahead_adder
+#(parameter WIDTH)
+ (input wire [WIDTH-1:0] i_add1,
+ input wire [WIDTH-1:0] i_add2,
+ output wire [WIDTH :0] o_result);
+
+ wire [WIDTH :0] w_C;
+ wire [WIDTH-1:0] w_G, w_P, w_SUM;
+ assign w_C[0] = 1'b0; // Carry in
+
+ genvar ii;
+ generate for (ii=0; ii<WIDTH; ii=ii+1) begin
+ full_adder full_adder_inst(
+ .i_bit1(i_add1[ii]),
+ .i_bit2(i_add2[ii]),
+ .i_carry(w_C[ii]),
+ .o_sum(w_SUM[ii]),
+ .o_carry());
+ end endgenerate
+
+ genvar jj;
+ generate for (jj=0; jj<WIDTH; jj=jj+1) begin
+ assign w_G[jj] = i_add1[jj] & i_add2[jj]; // Generate
+ assign w_P[jj] = i_add1[jj] | i_add2[jj]; // Propagate
+ assign w_C[jj+1] = w_G[jj] | (w_P[jj] & w_C[jj]); // Carry
+ end endgenerate
+
+ assign o_result = {w_C[WIDTH], w_SUM};
+endmodule
diff --git a/rtl/debouncer.v b/rtl/debouncer.v
new file mode 100644
index 0000000..57ae23f
--- /dev/null
+++ b/rtl/debouncer.v
@@ -0,0 +1,28 @@
+`default_nettype none
+
+module debouncer #(parameter CNT_WIDTH=16, ACT_HI=1)
+ (input clk,
+ input sig, // Glitchy, async to clk pushbutton signal
+ output reg state, // Whether the button is currently pressed
+ output down, // Single cycle pulse on press
+ output up); // Single cycle pulse on release
+
+ // Synchronize the pushbutton signal to the clock domain
+ reg sync0; always @(posedge clk) sync0 <= ACT_HI? sig : ~sig;
+ reg sync1; always @(posedge clk) sync1 <= sync0;
+
+ reg [CNT_WIDTH-1:0] cnt;
+
+ wire idle = (state == sync1);
+ wire trigger = &cnt; // Counter has reached max value, signal has stabilized
+
+ always @(posedge clk)
+ if (idle) cnt <= 0;
+ else begin
+ cnt <= cnt + 1;
+ if (trigger) state <= ~state;
+ end
+
+ assign down = ~idle & trigger & ~state;
+ assign up = ~idle & trigger & state;
+endmodule
diff --git a/rtl/northbridge.v b/rtl/northbridge.v
new file mode 100644
index 0000000..643a6aa
--- /dev/null
+++ b/rtl/northbridge.v
@@ -0,0 +1,40 @@
+`default_nettype none
+
+module NorthBridge
+#(parameter DATA_WIDTH = 16,
+ parameter ADDR_WIDTH = 16,
+ parameter SB_ADDR = 16'hFFFF,
+ parameter SB_MASK = 16'hFFFF,
+ parameter BOOT_FILE = "")
+ (input wire clk,
+ input wire rst,
+
+ input wire cpu_we,
+ input wire cpu_isData, // TODO
+ input wire [ADDR_WIDTH-1:0] cpu_addr,
+ input wire [DATA_WIDTH-1:0] from_cpu,
+ output wire [DATA_WIDTH-1:0] to_cpu,
+
+ output wire sb_we,
+ output wire [ADDR_WIDTH-1:0] sb_addr,
+ input wire [DATA_WIDTH-1:0] from_sb,
+ output wire [DATA_WIDTH-1:0] to_sb);
+
+ wire cpu_sel_sb = ~|((cpu_addr ^ SB_ADDR) & SB_MASK);
+
+ assign sb_addr = cpu_addr & ~SB_MASK;
+ assign sb_we = cpu_we && cpu_sel_sb;
+ assign to_sb = from_cpu;
+
+ wire [DATA_WIDTH-1:0] from_ram;
+ wire ram_we = cpu_we && ~cpu_sel_sb;
+
+ BlockRAM #(.DUAL_PORT(0), .INITIAL_FILE(BOOT_FILE)) ram(
+ .clk_a(clk), .clken_a(1'b1), .we_a(ram_we), .addr_a(cpu_addr),
+ .data_in_a(from_cpu), .data_out_a(from_ram));
+
+ reg cpu_read_sb;
+ always @(posedge clk) cpu_read_sb <= cpu_sel_sb;
+ assign to_cpu = cpu_read_sb ? from_sb : from_ram;
+
+endmodule
diff --git a/rtl/project.ys b/rtl/project.ys
new file mode 100644
index 0000000..50f2603
--- /dev/null
+++ b/rtl/project.ys
@@ -0,0 +1,2 @@
+read_verilog top.v
+synth_ecp5 -json top.json
diff --git a/rtl/regfile.v b/rtl/regfile.v
new file mode 100644
index 0000000..c5c61ed
--- /dev/null
+++ b/rtl/regfile.v
@@ -0,0 +1,27 @@
+`default_nettype none
+
+module RegFile
+#(parameter WIDTH = 16,
+ parameter SIZE = 8)
+
+ (input wire clk,
+ input wire we,
+
+ input wire [$clog2(SIZE)-1:0] src1,
+ input wire [$clog2(SIZE)-1:0] src2,
+ input wire [$clog2(SIZE)-1:0] tgt,
+
+ input wire [WIDTH-1:0] tgtIn,
+ output wire [WIDTH-1:0] out1,
+ output wire [WIDTH-1:0] out2);
+
+ reg [WIDTH-1:0] data [0:SIZE-1];
+
+ integer i;
+ initial for (i=0; i<SIZE; i++) data[i] <= 0;
+
+ assign out1 = data[src1];
+ assign out2 = data[src2];
+
+ always @(posedge clk) if (we && tgt) data[tgt] <= tgtIn;
+endmodule
diff --git a/rtl/southbridge.v b/rtl/southbridge.v
new file mode 100644
index 0000000..2d3544a
--- /dev/null
+++ b/rtl/southbridge.v
@@ -0,0 +1,98 @@
+`default_nettype none
+
+module SouthBridge
+ (input wire clk,
+ output wire rst_out,
+
+ input wire nb_we,
+ input wire [15:0] nb_addr,
+ input wire [15:0] from_nb,
+ output reg [15:0] to_nb,
+
+ input wire [3:0] sw,
+ input wire [6:0] btn,
+ output reg [7:0] led,
+
+ input wire from_ftdi_uart,
+ output wire to_ftdi_uart);
+
+ localparam CLOCK_RATE_HZ = 25_000_000;
+ localparam RST_SEC = 1;
+
+ localparam ADDR_BLINKEN = 16'hXXXF;
+ localparam ADDR_FTDI_UART = 16'hXXXE;
+
+ reg just_wrote = 0;
+ reg [15:0] just_wrote_to = 0;
+
+ // ---------------------------------------------------------------------------
+ // Blinkenlights
+
+ wire [6:0] btn_db;
+ debouncer dbO(clk, btn[1], btn_db[0]);
+ debouncer dbX(clk, btn[2], btn_db[1]);
+ debouncer dbU(clk, btn[3], btn_db[2]);
+ debouncer dbD(clk, btn[4], btn_db[3]);
+ debouncer dbL(clk, btn[5], btn_db[4]);
+ debouncer dbR(clk, btn[6], btn_db[5]);
+
+ debouncer #(.ACT_HI(0)) dbP(clk, btn[0], btn_db[6]);
+
+ localparam RST_CLK = CLOCK_RATE_HZ * RST_SEC;
+ localparam RST_BLINK_CLK = RST_CLK - (CLOCK_RATE_HZ / 20);
+
+ // TODO Software reset?
+ reg [28:0] rst_cnt = 0;
+ assign rst_out = rst_cnt == RST_CLK;
+ always @(posedge clk) if (btn_db[6]) begin
+ if (~rst_out) rst_cnt <= rst_cnt + 1;
+ end else rst_cnt <= 0;
+
+ always @(posedge clk) if (rst_out) led <= 0;
+ else if (rst_cnt >= RST_BLINK_CLK) led <= 8'hFF;
+ else if (nb_we && nb_addr == ADDR_BLINKEN) led <= from_nb[7:0];
+
+ wire [15:0] blinken_read = {sw, 5'd0, btn_db};
+
+ // ---------------------------------------------------------------------------
+ // FTDI UART
+
+ wire frx_avail; // RX data available (strobe, TODO use for interrupt)
+ wire [7:0] frx_data; // Data received
+ reg [7:0] ftx_data; // Data to send
+ wire ftx_stb; // Strobe to send data
+ wire ftx_busy; // Sending
+
+ localparam FTDI_BAUD_RATE = 115_200;
+ localparam FTDI_CLK_PER_BAUD = CLOCK_RATE_HZ / FTDI_BAUD_RATE;
+
+ UartTX #(FTDI_CLK_PER_BAUD[23:0]) ftx(clk, ftx_stb, ftx_data, to_ftdi_uart, ftx_busy);
+ UartRX #(FTDI_CLK_PER_BAUD[23:0]) frx(clk, rst_out, from_ftdi_uart, frx_avail, frx_data);
+
+ always @(posedge clk) if (nb_we && nb_addr == ADDR_FTDI_UART) ftx_data <= from_nb[7:0];
+ assign ftx_stb = just_wrote && (just_wrote_to == ADDR_FTDI_UART);
+
+ reg [6:0] frx_cnt = 0;
+ always @(posedge clk) begin
+ if (rst_out) frx_cnt <= 0;
+ if (frx_avail) frx_cnt <= frx_cnt + 1;
+ end
+
+ wire [15:0] ftdi_uart_read = {ftx_busy, frx_cnt, frx_data};
+
+ // ---------------------------------------------------------------------------
+
+ always @(posedge clk) casex (nb_addr)
+ ADDR_BLINKEN: to_nb <= blinken_read;
+ ADDR_FTDI_UART: to_nb <= ftdi_uart_read;
+ default: to_nb <= 0;
+ endcase
+
+ // For TX strobes
+ always @(posedge clk)
+ if (nb_we) begin
+ just_wrote <= 1;
+ just_wrote_to <= nb_addr;
+ end else just_wrote <= 0;
+
+endmodule
diff --git a/rtl/test/cpu_tb.v b/rtl/test/cpu_tb.v
new file mode 100644
index 0000000..fdf05a9
--- /dev/null
+++ b/rtl/test/cpu_tb.v
@@ -0,0 +1,29 @@
+`default_nettype none
+`timescale 1ns / 1ps
+
+module top;
+
+ reg clk = 0, rst = 1;
+ always #1 clk = ~clk;
+
+ wire [15:0] mem_in, mem_out, mem_addr;
+ wire mem_we;
+
+ BlockRAM #(.DUAL_PORT(0), .ADDR_WIDTH(8), .INITIAL_FILE("prog.mem")) ram(
+ .clk_a(clk), .clken_a(1'b1), .we_a(mem_we),
+ .addr_a(mem_addr[7:0]), .data_in_a(mem_out), .data_out_a(mem_in));
+
+ UnprivilegedCPU dut(.clk(clk), .rst(rst), .en(1'b1),
+ .mem_in(mem_in), .mem_out(mem_out),
+ .mem_addr(mem_addr), .mem_we(mem_we));
+
+ initial begin
+ $monitor("%4X: %4X <= %4X (%d)", mem_addr, mem_in, mem_out, mem_we);
+ $dumpfile("cpu_tb.vcd");
+ $dumpvars(4);
+
+ #1 rst <= 0;
+ #31 $finish;
+ end
+
+endmodule
diff --git a/rtl/test/prog.mem b/rtl/test/prog.mem
new file mode 100644
index 0000000..fb7c283
--- /dev/null
+++ b/rtl/test/prog.mem
@@ -0,0 +1,16 @@
+//2404 // 1,1,0,4 // addi 1,0,@fn
+//FC80 // 7,7,1,0 // jalr 7,1
+//8408 // 4,1,0,8 // sw 1,0,@dat
+//DC7F // 6,7,0,-1 // bra 0,-1
+//2402 // 1,1,0,2 // addi 1,0,2
+//E380 // 7,0,7,0 // jalr 0,7
+
+4404 // 2,1,0100 // 647E // 3,1,0,-2 // lw 1,0,@uart
+C4FE // 6,1,1,-2 // blz 1,-2
+0CA2 // 0,3,1,2,2 // xor 3,1,2
+0D83 // 0,3,3,0,3 // ubl 3,3
+C9FB // 6,2,3,-5 // bz 3,-5
+2880 // 1,2,1,0 // mv 2,1
+847F // 4,1,0,-1 // sw 1,0,@led
+847E // 4,1,0,-2 // sw 1,0,@uart
+DC77 // 6,7,0,-9 // j 0,-9
diff --git a/rtl/top.v b/rtl/top.v
new file mode 100644
index 0000000..cf174e7
--- /dev/null
+++ b/rtl/top.v
@@ -0,0 +1,90 @@
+`include "alu.v"
+`include "regfile.v"
+`include "unprivileged_cpu.v"
+
+`include "bram.v"
+`include "northbridge.v"
+
+`include "debouncer.v"
+`include "uart_rx.v"
+`include "uart_tx.v"
+//`include "i2c_controller.v"
+//`include "spi_controller.v"
+//`include "ps2_controller.v"
+`include "southbridge.v"
+
+module top
+ (input wire clk_25mhz,
+ input wire [3:0] sw,
+ input wire [6:0] btn,
+ output wire [7:0] led,
+
+ input wire ftdi_txd,
+ output wire ftdi_rxd,
+
+ //output wire oled_csn,
+ //output wire oled_clk,
+ //output wire oled_mosi,
+ //output wire oled_dc,
+ //output wire oled_resn,
+ //output wire oled_bl,
+
+ //output wire flash_csn,
+ ////output wire flash_clk, // Special, see below
+ //output wire flash_mosi,
+ //input wire flash_miso,
+ //output wire flash_holdn,
+ //output wire flash_wpn,
+
+ //output wire sd_csn,
+ //output wire sd_clk,
+ //output wire sd_di,
+ //input wire sd_do,
+
+ //// Used as PS/2
+ //input wire usb_fpga_bd_dp,
+ //input wire usb_fpga_bd_dn,
+ //output wire usb_fpga_pu_dp,
+ //output wire usb_fpga_pu_dn,
+
+ //inout wire gpdi_scl,
+ //inout wire gpdi_sda,
+
+ output wire wifi_en);
+ assign wifi_en = 0;
+
+ //wire flash_clk;
+ //wire flash_tri = 1'b0;
+ //USRMCLK u1 (.USRMCLKI(flash_clk), .USRMCLKTS(flash_tri));
+ //// Flash clock pin is special because it's used for FPGA config
+ //// This module lets us use it as a normal output pin
+
+ // -------------------------------------------------------------------
+
+ wire clk, rst;
+ //reg [22:0] clk_div = 0;
+ //always @(posedge clk_25mhz) clk_div <= clk_div + 1;
+ assign clk = /*/ clk_div[0]; // */ clk_25mhz;
+
+ // -------------------------------------------------------------------
+
+ wire [15:0] cpu_addr, sb_addr;
+ wire [15:0] nb_to_cpu, cpu_to_nb, nb_to_sb, sb_to_nb;
+ wire cpu_isData, cpu_we, sb_we;
+
+ UnprivilegedCPU cpu(.clk(clk), .rst(rst), .en(1'b1),
+ .mem_addr(cpu_addr), .mem_in(nb_to_cpu), .mem_out(cpu_to_nb),
+ .mem_isData(cpu_isData), .mem_we(cpu_we));
+
+ NorthBridge #(.SB_ADDR(16'hFFF0), .SB_MASK(16'hFFF0),
+ .BOOT_FILE("boot.mem")) nb(.clk(clk), .rst(rst), .cpu_isData(cpu_isData),
+ .cpu_addr(cpu_addr), .from_cpu(cpu_to_nb), .to_cpu(nb_to_cpu), .cpu_we(cpu_we),
+ .sb_addr(sb_addr), .from_sb(sb_to_nb), .to_sb(nb_to_sb), .sb_we(sb_we));
+
+ SouthBridge sb(.clk(clk), .rst_out(rst), .nb_we(sb_we),
+ .nb_addr(sb_addr), .from_nb(nb_to_sb), .to_nb(sb_to_nb),
+
+ .sw(sw), .btn(btn), .led(led), // Blinkenlights
+ .from_ftdi_uart(ftdi_txd), .to_ftdi_uart(ftdi_rxd));
+
+endmodule
diff --git a/rtl/uart_rx.v b/rtl/uart_rx.v
new file mode 100644
index 0000000..d8650f5
--- /dev/null
+++ b/rtl/uart_rx.v
@@ -0,0 +1,74 @@
+`default_nettype none
+
+module UartRX
+#(parameter [23:0] CLOCKS_PER_BAUD = 24'd1)
+ (input wire clk,
+ input wire rst,
+ input wire uart_rx,
+ output reg write = 0,
+ output reg [7:0] data = 0);
+
+ localparam [23:0] HALF_BAUD = { 1'b0, CLOCKS_PER_BAUD[23:1] } - 1;
+
+ localparam RX_START = 4'h0;
+ localparam RX_STOP = 4'h8;
+ localparam RX_IDLE = 4'hf;
+
+ // --------------------------------------------------------------------------
+
+ reg baud_edge = 0;
+ reg [23:0] baud_counter;
+ always @(posedge clk) begin
+ if (baud_edge || (state == RX_IDLE))
+ baud_counter <= CLOCKS_PER_BAUD - 1;
+ else baud_counter <= baud_counter - 1;
+
+ // Avoid weird timing dependencies by calculating right *before* edge
+ baud_edge <= (state != RX_IDLE) && (baud_counter == 1);
+ end
+
+ // --------------------------------------------------------------------------
+
+ // Time since last change, used for break/idle detection
+ reg [23:0] chg_counter = 0;
+ always @(posedge clk)
+ if (rx_db2 != rx_bit) chg_counter <= 0;
+ else chg_counter <= chg_counter + 1;
+
+ reg baud_halfway = 0; // Used to detect middle of start bit
+ always @(posedge clk) baud_halfway <= ~rx_bit && (chg_counter >= HALF_BAUD);
+
+ reg [3:0] state = RX_IDLE;
+ always @(posedge clk) begin
+ if (state == RX_IDLE) begin
+ if (~rx_bit && baud_halfway) state <= RX_START;
+
+ end else if (baud_edge) begin
+ if (state < RX_STOP) state <= state + 1;
+ else state <= RX_IDLE;
+ end
+ end
+
+ // --------------------------------------------------------------------------
+
+ // Debounce input
+ reg rx_db1 = 0, rx_db2 = 0, rx_bit = 0;
+ always @(posedge clk) begin
+ rx_db1 <= uart_rx;
+ rx_db2 <= rx_db1;
+ rx_bit <= rx_db2;
+ end
+
+ reg [7:0] data_reg;
+ always @(posedge clk) begin
+ write <= 0;
+ if (rst) data <= 0;
+ else begin
+ if (baud_edge) data_reg <= { rx_bit, data_reg[7:1] };
+ if (baud_edge && (state == RX_STOP)) begin
+ data <= data_reg; write <= 1;
+ end
+ end
+ end
+
+endmodule
diff --git a/rtl/uart_tx.v b/rtl/uart_tx.v
new file mode 100644
index 0000000..2c23878
--- /dev/null
+++ b/rtl/uart_tx.v
@@ -0,0 +1,74 @@
+`default_nettype none
+
+module UartTX
+#(parameter [23:0] CLOCKS_PER_BAUD = 24'd1)
+ (input wire clk,
+ input wire write,
+ input wire [7:0] data,
+ output reg uart_tx = 1,
+ output reg busy = 1);
+
+ localparam TX_START = 4'h0;
+ localparam TX_STOP = 4'h8;
+ localparam TX_IDLE = 4'hf;
+
+ // --------------------------------------------------------------------------
+
+ reg baud_edge = 0;
+ reg [23:0] baud_counter = 24'h5;
+ always @(posedge clk) begin
+
+ // Avoid weird timing dependencies by calculating right *before* edge
+ baud_edge <= baud_counter == 1;
+
+ if (state == TX_IDLE) begin
+ if (write && !busy) begin
+ baud_counter <= CLOCKS_PER_BAUD - 1;
+ baud_edge <= 0;
+
+ end else begin
+ baud_counter <= 0;
+ baud_edge <= 1;
+ end
+
+ end else if (!baud_edge)
+ baud_counter <= baud_counter - 1;
+ else baud_counter <= CLOCKS_PER_BAUD - 1;
+ end
+
+ // --------------------------------------------------------------------------
+
+ reg [3:0] state = TX_IDLE;
+ always @(posedge clk) begin
+ if (!baud_edge) busy <= 1;
+
+ else if (state == TX_IDLE) begin
+ if (write && !busy) begin
+ state <= TX_START;
+ busy <= 1;
+
+ end else busy <= 0;
+
+ end else begin
+ busy <= 1;
+
+ // start bit, 8 data bits, stop bit
+ if (state <= TX_STOP) state <= state + 1;
+ else state <= TX_IDLE;
+ end
+ end
+
+ // --------------------------------------------------------------------------
+
+ reg [7:0] r_data = 8'hFF;
+ always @(posedge clk)
+ if (write && !busy) begin
+ uart_tx <= 0; // Start bit
+ r_data <= data;
+
+ end else if (baud_edge) begin
+ uart_tx <= r_data[0];
+ r_data <= { 1'b1, r_data[7:1] };
+ end
+
+endmodule
diff --git a/rtl/ulx3s.lpf b/rtl/ulx3s.lpf
new file mode 100644
index 0000000..e27bbe4
--- /dev/null
+++ b/rtl/ulx3s.lpf
@@ -0,0 +1,617 @@
+BLOCK RESETPATHS;
+BLOCK ASYNCPATHS;
+## ULX3S v3.1.6 and v3.1.7
+
+# The clock "usb" and "gpdi" sheet
+LOCATE COMP "clk_25mhz" SITE "G2";
+IOBUF PORT "clk_25mhz" PULLMODE=NONE IO_TYPE=LVCMOS33;
+FREQUENCY PORT "clk_25mhz" 25 MHZ;
+
+# JTAG and SPI FLASH voltage 3.3V and options to boot from SPI flash
+# write to FLASH possible any time from JTAG:
+#SYSCONFIG CONFIG_IOVOLTAGE=3.3 COMPRESS_CONFIG=ON MCCLK_FREQ=62 SLAVE_SPI_PORT=DISABLE MASTER_SPI_PORT=ENABLE SLAVE_PARALLEL_PORT=DISABLE;
+# write to FLASH possible from user bitstream:
+SYSCONFIG CONFIG_IOVOLTAGE=3.3 COMPRESS_CONFIG=ON MCCLK_FREQ=62 SLAVE_SPI_PORT=DISABLE MASTER_SPI_PORT=DISABLE SLAVE_PARALLEL_PORT=DISABLE;
+
+## USBSERIAL FTDI-FPGA serial port "usb" sheet
+LOCATE COMP "ftdi_rxd" SITE "L4"; # FPGA transmits to ftdi
+LOCATE COMP "ftdi_txd" SITE "M1"; # FPGA receives from ftdi
+LOCATE COMP "ftdi_nrts" SITE "M3"; # FPGA receives
+LOCATE COMP "ftdi_ndtr" SITE "N1"; # FPGA receives
+LOCATE COMP "ftdi_txden" SITE "L3"; # FPGA receives
+LOCATE COMP "ftdi_nrxled" SITE "L5"; # FPGA receives
+IOBUF PORT "ftdi_rxd" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "ftdi_txd" PULLMODE=UP IO_TYPE=LVCMOS33;
+IOBUF PORT "ftdi_nrts" PULLMODE=UP IO_TYPE=LVCMOS33;
+IOBUF PORT "ftdi_ndtr" PULLMODE=UP IO_TYPE=LVCMOS33;
+IOBUF PORT "ftdi_txden" PULLMODE=UP IO_TYPE=LVCMOS33;
+IOBUF PORT "ftdi_nrxled" PULLMODE=UP IO_TYPE=LVCMOS33;
+
+## LED indicators "blinkey" and "gpio" sheet
+LOCATE COMP "led[7]" SITE "H3";
+LOCATE COMP "led[6]" SITE "E1";
+LOCATE COMP "led[5]" SITE "E2";
+LOCATE COMP "led[4]" SITE "D1";
+LOCATE COMP "led[3]" SITE "D2";
+LOCATE COMP "led[2]" SITE "C1";
+LOCATE COMP "led[1]" SITE "C2";
+LOCATE COMP "led[0]" SITE "B2";
+IOBUF PORT "led[0]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "led[1]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "led[2]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "led[3]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "led[4]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "led[5]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "led[6]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "led[7]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+
+## Pushbuttons "blinkey", "flash", "power", "gpdi" sheet
+LOCATE COMP "btn[0]" SITE "D6"; # BTN_PWRn (inverted logic)
+LOCATE COMP "btn[1]" SITE "R1"; # FIRE1
+LOCATE COMP "btn[2]" SITE "T1"; # FIRE2
+LOCATE COMP "btn[3]" SITE "R18"; # UP
+LOCATE COMP "btn[4]" SITE "V1"; # DOWN
+LOCATE COMP "btn[5]" SITE "U1"; # LEFT
+LOCATE COMP "btn[6]" SITE "H16"; # RIGHT
+IOBUF PORT "btn[0]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "btn[1]" PULLMODE=DOWN IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "btn[2]" PULLMODE=DOWN IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "btn[3]" PULLMODE=DOWN IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "btn[4]" PULLMODE=DOWN IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "btn[5]" PULLMODE=DOWN IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "btn[6]" PULLMODE=DOWN IO_TYPE=LVCMOS33 DRIVE=4;
+
+## DIP switch "blinkey", "gpio" sheet
+LOCATE COMP "sw[0]" SITE "E8"; # SW1
+LOCATE COMP "sw[1]" SITE "D8"; # SW2
+LOCATE COMP "sw[2]" SITE "D7"; # SW3
+LOCATE COMP "sw[3]" SITE "E7"; # SW4
+IOBUF PORT "sw[0]" PULLMODE=DOWN IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sw[1]" PULLMODE=DOWN IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sw[2]" PULLMODE=DOWN IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sw[3]" PULLMODE=DOWN IO_TYPE=LVCMOS33 DRIVE=4;
+
+## SPI OLED DISPLAY SSD1331 (Color) or SSD1306 (B/W) "blinkey", "usb" sheet
+LOCATE COMP "oled_clk" SITE "P4"; # serdes refclkn_d0
+LOCATE COMP "oled_mosi" SITE "P3"; # serdes refclkp_d0
+LOCATE COMP "oled_resn" SITE "P2"; # serdes hdrxn0_d0ch1
+LOCATE COMP "oled_dc" SITE "P1"; # serdes hdrxp0_d0ch1
+LOCATE COMP "oled_csn" SITE "N2"; # serdes hdrxn0_d0ch0
+LOCATE COMP "oled_bl" SITE "J4"; # serdes hdrxp0_d0ch0
+IOBUF PORT "oled_clk" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "oled_mosi" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "oled_resn" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "oled_dc" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "oled_csn" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "oled_bl" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+
+## SPI Flash chip "flash" sheet
+LOCATE COMP "flash_csn" SITE "R2";
+LOCATE COMP "flash_clk" SITE "U3";
+LOCATE COMP "flash_mosi" SITE "W2";
+LOCATE COMP "flash_miso" SITE "V2";
+LOCATE COMP "flash_holdn" SITE "W1";
+LOCATE COMP "flash_wpn" SITE "Y2";
+#LOCATE COMP "flash_csspin" SITE "AJ3";
+#LOCATE COMP "flash_initn" SITE "AG4";
+#LOCATE COMP "flash_done" SITE "AJ4";
+#LOCATE COMP "flash_programn" SITE "AH4";
+#LOCATE COMP "flash_cfg_select[0]" SITE "AM4";
+#LOCATE COMP "flash_cfg_select[1]" SITE "AL4";
+#LOCATE COMP "flash_cfg_select[2]" SITE "AK4";
+IOBUF PORT "flash_csn" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "flash_clk" PULLMODE=DOWN IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "flash_mosi" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "flash_miso" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "flash_holdn" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "flash_wpn" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+#IOBUF PORT "flash_csspin" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+#IOBUF PORT "flash_initn" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+#IOBUF PORT "flash_done" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+#IOBUF PORT "flash_programn" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+#IOBUF PORT "flash_cfg_select[0]" PULLMODE=DOWN IO_TYPE=LVCMOS33 DRIVE=4;
+#IOBUF PORT "flash_cfg_select[1]" PULLMODE=DOWN IO_TYPE=LVCMOS33 DRIVE=4;
+#IOBUF PORT "flash_cfg_select[2]" PULLMODE=DOWN IO_TYPE=LVCMOS33 DRIVE=4;
+
+## SD card "sdcard", "usb" sheet
+# wifi_gpio2,4,12,13,14,15 are shared with SD card.
+# If any of wifi_gpio2,4,12,13 is used in toplevel, don't use sd_d[].
+# If SD is used in 1-bit SPI mode, wifi_gpio4,12 = sd_d[1,2] are free,
+LOCATE COMP "sd_clk" SITE "H2"; # sd_clk WiFi_GPIO14
+LOCATE COMP "sd_cmd" SITE "J1"; # sd_cmd_di (MOSI) WiFi GPIO15
+LOCATE COMP "sd_d[0]" SITE "J3"; # sd_d0_do (MISO) WiFi GPIO2
+LOCATE COMP "sd_d[1]" SITE "H1"; # sd_d1_irq WiFi GPIO4
+LOCATE COMP "sd_d[2]" SITE "K1"; # sd_d2 WiFi_GPIO12
+LOCATE COMP "sd_d[3]" SITE "K2"; # sd_d3_csn WiFi_GPIO13
+LOCATE COMP "sd_wp" SITE "P5"; # not connected
+LOCATE COMP "sd_cdn" SITE "N5"; # not connected
+IOBUF PORT "sd_clk" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sd_cmd" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sd_d[0]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sd_d[1]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sd_d[2]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4; # WiFi GPIO12 pulldown bootstrapping without 3.3V efuse
+IOBUF PORT "sd_d[3]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sd_wp" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sd_cdn" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+
+## SD card SPI mode, don't use with conflicting pins from above
+LOCATE COMP "sd_di" SITE "J1"; # sd_cmd
+LOCATE COMP "sd_do" SITE "J3"; # sd_d0
+LOCATE COMP "sd_csn" SITE "K2"; # sd_d3
+IOBUF PORT "sd_di" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sd_do" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sd_csn" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+
+## ADC SPI (MAX11123) "analog", "ram" sheet
+# input lines shared with GP,GN14-17
+LOCATE COMP "adc_csn" SITE "R17";
+LOCATE COMP "adc_mosi" SITE "R16";
+LOCATE COMP "adc_miso" SITE "U16";
+LOCATE COMP "adc_sclk" SITE "P17";
+IOBUF PORT "adc_csn" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "adc_mosi" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "adc_miso" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "adc_sclk" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+
+## Audio 4-bit DAC "analog", "gpio" sheet
+# output impedance: 75 ohm
+# Stereo 16 ohm earphones, analog audio,
+# SPDIF digital audio and composite video.
+LOCATE COMP "audio_l[3]" SITE "B3"; # JACK TIP (left audio)
+LOCATE COMP "audio_l[2]" SITE "C3";
+LOCATE COMP "audio_l[1]" SITE "D3";
+LOCATE COMP "audio_l[0]" SITE "E4";
+LOCATE COMP "audio_r[3]" SITE "C5"; # JACK RING1 (right audio)
+LOCATE COMP "audio_r[2]" SITE "D5";
+LOCATE COMP "audio_r[1]" SITE "B5";
+LOCATE COMP "audio_r[0]" SITE "A3";
+LOCATE COMP "audio_v[3]" SITE "E5"; # JACK RING2 (video or digital audio)
+LOCATE COMP "audio_v[2]" SITE "F5";
+LOCATE COMP "audio_v[1]" SITE "F2";
+LOCATE COMP "audio_v[0]" SITE "H5";
+IOBUF PORT "audio_l[3]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=16;
+IOBUF PORT "audio_l[2]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=16;
+IOBUF PORT "audio_l[1]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=16;
+IOBUF PORT "audio_l[0]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=16;
+IOBUF PORT "audio_r[3]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=16;
+IOBUF PORT "audio_r[2]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=16;
+IOBUF PORT "audio_r[1]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=16;
+IOBUF PORT "audio_r[0]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=16;
+IOBUF PORT "audio_v[3]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=16;
+IOBUF PORT "audio_v[2]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=16;
+IOBUF PORT "audio_v[1]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=16;
+IOBUF PORT "audio_v[0]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=16;
+
+## WiFi ESP-32 "wifi", "usb", "flash" sheet
+# wifi_gpio2,4,12,13,14,15 are shared with SD card.
+# If any of wifi_gpio2,4,12,13 is used in toplevel, don't use sd_d[].
+# If SD is used in 1-bit SPI mode, wifi_gpio4,12 = sd_d[1,2] are free,
+LOCATE COMP "wifi_en" SITE "J5"; # enable/reset WiFi
+LOCATE COMP "wifi_rxd" SITE "K3"; # FPGA transmits to WiFi
+LOCATE COMP "wifi_txd" SITE "K4"; # FPGA receives from WiFi
+LOCATE COMP "wifi_gpio0" SITE "F1"; # wifi_en on v3.0.x
+LOCATE COMP "wifi_gpio19" SITE "N4"; # wifi_gpio5 on v3.0.x
+LOCATE COMP "wifi_gpio21" SITE "K5";
+LOCATE COMP "wifi_gpio22" SITE "L2"; # wifi_gpio0 on v3.0.x
+LOCATE COMP "wifi_gpio25" SITE "E9";
+LOCATE COMP "wifi_gpio26" SITE "L1"; # wifi_gpio16 on v3.0.x
+LOCATE COMP "wifi_gpio27" SITE "N3"; # wifi_gpio17 on v3.0.x
+LOCATE COMP "wifi_gpio35" SITE "E6"; # FPGA transmits to WiFi
+# wifi lines shared with SD card
+LOCATE COMP "wifi_gpio2" SITE "J3"; # sd_d0_do (MISO) WiFi GPIO2
+LOCATE COMP "wifi_gpio4" SITE "H1"; # sd_d1_irq WiFi GPIO4
+LOCATE COMP "wifi_gpio12" SITE "K1"; # sd_d2 WiFi_GPIO12
+LOCATE COMP "wifi_gpio13" SITE "K2"; # sd_d3_csn WiFi_GPIO13
+LOCATE COMP "wifi_gpio14" SITE "H2"; # sd_clk WiFi_GPIO14
+LOCATE COMP "wifi_gpio15" SITE "J1"; # sd_cmd_di (MOSI) WiFi GPIO15
+# wifi lines shared with JTAG
+# LOCATE COMP "wifi_gpio5" SITE "U5"; # JTAG TMS, blue LED inverted logic
+# LOCATE COMP "wifi_gpio18" SITE "T5"; # JTAG TCK
+# LOCATE COMP "wifi_gpio23" SITE "R5"; # JTAG TDI
+# LOCATE COMP "wifi_gpio34" SITE "V4"; # JTAG TDO
+IOBUF PORT "wifi_en" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "wifi_rxd" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "wifi_txd" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "wifi_gpio0" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "wifi_gpio19" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "wifi_gpio21" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "wifi_gpio22" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "wifi_gpio25" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "wifi_gpio26" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "wifi_gpio27" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "wifi_gpio35" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+# wifi lines shared with SD card
+IOBUF PORT "wifi_gpio2" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "wifi_gpio4" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "wifi_gpio12" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4; # WiFi GPIO12 pulldown bootstrapping without 3.3V efuse
+IOBUF PORT "wifi_gpio13" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "wifi_gpio14" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "wifi_gpio15" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+
+## PCB antenna 433 MHz (may be also used for FM) "usb" sheet
+LOCATE COMP "ant_433mhz" SITE "G1";
+IOBUF PORT "ant_433mhz" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+
+## Second USB port "US2" going directly into FPGA "usb", "ram" sheet
+LOCATE COMP "usb_fpga_dp" SITE "E16"; # single ended or differential input only
+LOCATE COMP "usb_fpga_dn" SITE "F16";
+IOBUF PORT "usb_fpga_dp" PULLMODE=NONE IO_TYPE=LVCMOS33D DRIVE=16;
+IOBUF PORT "usb_fpga_dn" PULLMODE=NONE IO_TYPE=LVCMOS33D DRIVE=16;
+LOCATE COMP "usb_fpga_bd_dp" SITE "D15"; # single-ended bidirectional
+LOCATE COMP "usb_fpga_bd_dn" SITE "E15";
+IOBUF PORT "usb_fpga_bd_dp" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "usb_fpga_bd_dn" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+LOCATE COMP "usb_fpga_pu_dp" SITE "B12"; # pull up/down control
+LOCATE COMP "usb_fpga_pu_dn" SITE "C12";
+IOBUF PORT "usb_fpga_pu_dp" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=16;
+IOBUF PORT "usb_fpga_pu_dn" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=16;
+
+## JTAG ESP-32 "usb" sheet
+# connected to FT231X and ESP-32
+# commented out because those are dedicated pins, not directly useable as GPIO
+# but could be used by some vendor-specific JTAG bridging (boundary scan) module
+#LOCATE COMP "jtag_tdi" SITE "R5"; # FTDI_nRI FPGA receives
+#LOCATE COMP "jtag_tdo" SITE "V4"; # FTDI_nCTS FPGA transmits
+#LOCATE COMP "jtag_tck" SITE "T5"; # FTDI_nDSR FPGA receives
+#LOCATE COMP "jtag_tms" SITE "U5"; # FTDI_nDCD FPGA receives
+#IOBUF PORT "jtag_tdi" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+#IOBUF PORT "jtag_tdo" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+#IOBUF PORT "jtag_tck" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+#IOBUF PORT "jtag_tms" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+
+## SDRAM "ram" sheet
+LOCATE COMP "sdram_clk" SITE "F19";
+LOCATE COMP "sdram_cke" SITE "F20";
+LOCATE COMP "sdram_csn" SITE "P20";
+LOCATE COMP "sdram_wen" SITE "T20";
+LOCATE COMP "sdram_rasn" SITE "R20";
+LOCATE COMP "sdram_casn" SITE "T19";
+LOCATE COMP "sdram_a[0]" SITE "M20";
+LOCATE COMP "sdram_a[1]" SITE "M19";
+LOCATE COMP "sdram_a[2]" SITE "L20";
+LOCATE COMP "sdram_a[3]" SITE "L19";
+LOCATE COMP "sdram_a[4]" SITE "K20";
+LOCATE COMP "sdram_a[5]" SITE "K19";
+LOCATE COMP "sdram_a[6]" SITE "K18";
+LOCATE COMP "sdram_a[7]" SITE "J20";
+LOCATE COMP "sdram_a[8]" SITE "J19";
+LOCATE COMP "sdram_a[9]" SITE "H20";
+LOCATE COMP "sdram_a[10]" SITE "N19";
+LOCATE COMP "sdram_a[11]" SITE "G20";
+LOCATE COMP "sdram_a[12]" SITE "G19";
+LOCATE COMP "sdram_ba[0]" SITE "P19";
+LOCATE COMP "sdram_ba[1]" SITE "N20";
+LOCATE COMP "sdram_dqm[0]" SITE "U19";
+LOCATE COMP "sdram_dqm[1]" SITE "E20";
+LOCATE COMP "sdram_d[0]" SITE "J16";
+LOCATE COMP "sdram_d[1]" SITE "L18";
+LOCATE COMP "sdram_d[2]" SITE "M18";
+LOCATE COMP "sdram_d[3]" SITE "N18";
+LOCATE COMP "sdram_d[4]" SITE "P18";
+LOCATE COMP "sdram_d[5]" SITE "T18";
+LOCATE COMP "sdram_d[6]" SITE "T17";
+LOCATE COMP "sdram_d[7]" SITE "U20";
+LOCATE COMP "sdram_d[8]" SITE "E19";
+LOCATE COMP "sdram_d[9]" SITE "D20";
+LOCATE COMP "sdram_d[10]" SITE "D19";
+LOCATE COMP "sdram_d[11]" SITE "C20";
+LOCATE COMP "sdram_d[12]" SITE "E18";
+LOCATE COMP "sdram_d[13]" SITE "F18";
+LOCATE COMP "sdram_d[14]" SITE "J18";
+LOCATE COMP "sdram_d[15]" SITE "J17";
+IOBUF PORT "sdram_clk" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_cke" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_csn" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_wen" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_rasn" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_casn" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_a[0]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_a[1]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_a[2]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_a[3]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_a[4]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_a[5]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_a[6]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_a[7]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_a[8]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_a[9]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_a[10]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_a[11]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_a[12]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_ba[0]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_ba[1]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_dqm[0]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_dqm[1]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_d[0]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_d[1]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_d[2]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_d[3]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_d[4]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_d[5]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_d[6]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_d[7]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_d[8]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_d[9]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_d[10]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_d[11]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_d[12]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_d[13]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_d[14]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "sdram_d[15]" PULLMODE=NONE IO_TYPE=LVCMOS33 DRIVE=4;
+
+# GPDI differential interface (Video) "gpdi" sheet
+LOCATE COMP "gpdi_dp[0]" SITE "A16"; # Blue +
+LOCATE COMP "gpdi_dn[0]" SITE "B16"; # Blue -
+LOCATE COMP "gpdi_dp[1]" SITE "A14"; # Green +
+LOCATE COMP "gpdi_dn[1]" SITE "C14"; # Green -
+LOCATE COMP "gpdi_dp[2]" SITE "A12"; # Red +
+LOCATE COMP "gpdi_dn[2]" SITE "A13"; # Red -
+LOCATE COMP "gpdi_dp[3]" SITE "A17"; # Clock +
+LOCATE COMP "gpdi_dn[3]" SITE "B18"; # Clock -
+LOCATE COMP "gpdi_util" SITE "A19"; # R55 10k
+LOCATE COMP "gpdi_hpd" SITE "B20"; # R67 549ohm
+LOCATE COMP "gpdi_cec" SITE "A18"; # R61 549ohm, D30 3.6V Zener
+LOCATE COMP "gpdi_sda" SITE "B19"; # I2C shared with RTC
+LOCATE COMP "gpdi_scl" SITE "E12"; # I2C shared with RTC
+IOBUF PORT "gpdi_dp[0]" IO_TYPE=LVCMOS33D DRIVE=4;
+IOBUF PORT "gpdi_dn[0]" IO_TYPE=LVCMOS33D DRIVE=4;
+IOBUF PORT "gpdi_dp[1]" IO_TYPE=LVCMOS33D DRIVE=4;
+IOBUF PORT "gpdi_dn[1]" IO_TYPE=LVCMOS33D DRIVE=4;
+IOBUF PORT "gpdi_dp[2]" IO_TYPE=LVCMOS33D DRIVE=4;
+IOBUF PORT "gpdi_dn[2]" IO_TYPE=LVCMOS33D DRIVE=4;
+IOBUF PORT "gpdi_dp[3]" IO_TYPE=LVCMOS33D DRIVE=4;
+IOBUF PORT "gpdi_dn[3]" IO_TYPE=LVCMOS33D DRIVE=4;
+IOBUF PORT "gpdi_util" IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gpdi_hpd" IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gpdi_cec" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gpdi_sda" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gpdi_scl" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+
+# GPIO (default single-ended) "gpio", "ram", "gpdi" sheet
+# Pins enumerated gp[0-27], gn[0-27].
+# With differential mode enabled on Lattice,
+# gp[] (+) are used, gn[] (-) are ignored from design
+# as they handle inverted signal by default.
+# To enable differential, rename LVCMOS33->LVCMOS33D
+# FEMALE ANGLED (90 deg PMOD) on TOP or
+# MALE VERTICAL ( 0 deg pins) on BOTTOM and flat cable
+LOCATE COMP "gp[0]" SITE "B11"; # PCLK
+LOCATE COMP "gn[0]" SITE "C11"; # PCLK
+LOCATE COMP "gp[1]" SITE "A10"; # PCLK
+LOCATE COMP "gn[1]" SITE "A11"; # PCLK
+LOCATE COMP "gp[2]" SITE "A9"; # GR_PCLK
+LOCATE COMP "gn[2]" SITE "B10"; # GR_PCLK
+LOCATE COMP "gp[3]" SITE "B9";
+LOCATE COMP "gn[3]" SITE "C10";
+LOCATE COMP "gp[4]" SITE "A7";
+LOCATE COMP "gn[4]" SITE "A8";
+LOCATE COMP "gp[5]" SITE "C8";
+LOCATE COMP "gn[5]" SITE "B8";
+LOCATE COMP "gp[6]" SITE "C6";
+LOCATE COMP "gn[6]" SITE "C7";
+IOBUF PORT "gp[0]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[0]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[1]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[1]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[2]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[2]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[3]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[3]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[4]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[4]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[5]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[5]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[6]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[6]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+LOCATE COMP "gp[7]" SITE "A6";
+LOCATE COMP "gn[7]" SITE "B6";
+LOCATE COMP "gp[8]" SITE "A4"; # DIFF
+LOCATE COMP "gn[8]" SITE "A5"; # DIFF
+LOCATE COMP "gp[9]" SITE "A2"; # DIFF
+LOCATE COMP "gn[9]" SITE "B1"; # DIFF
+LOCATE COMP "gp[10]" SITE "C4"; # DIFF
+LOCATE COMP "gn[10]" SITE "B4"; # DIFF
+LOCATE COMP "gp[11]" SITE "F4"; # DIFF
+LOCATE COMP "gn[11]" SITE "E3"; # DIFF
+LOCATE COMP "gp[12]" SITE "G3"; # DIFF wifi_gpio33 PCLK
+LOCATE COMP "gn[12]" SITE "F3"; # DIFF wifi_gpio32 PCLK
+LOCATE COMP "gp[13]" SITE "H4"; # DIFF wifi_gpio36 / sensor_vp
+LOCATE COMP "gn[13]" SITE "G5"; # DIFF wifi_gpio39 / sensor_vn
+IOBUF PORT "gp[7]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[7]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[8]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[8]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[9]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[9]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[10]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[10]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[11]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[11]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[12]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[12]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[13]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[13]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+LOCATE COMP "gp[14]" SITE "U18"; # DIFF ADC AIN1
+LOCATE COMP "gn[14]" SITE "U17"; # DIFF ADC AIN0
+LOCATE COMP "gp[15]" SITE "N17"; # DIFF ADC AIN3
+LOCATE COMP "gn[15]" SITE "P16"; # DIFF ADC AIN2
+LOCATE COMP "gp[16]" SITE "N16"; # DIFF ADC AIN5
+LOCATE COMP "gn[16]" SITE "M17"; # DIFF ADC AIN4
+LOCATE COMP "gp[17]" SITE "L16"; # DIFF ADC AIN7 GR_PCLK
+LOCATE COMP "gn[17]" SITE "L17"; # DIFF ADC AIN6
+LOCATE COMP "gp[18]" SITE "H18"; # DIFF
+LOCATE COMP "gn[18]" SITE "H17"; # DIFF
+LOCATE COMP "gp[19]" SITE "F17"; # DIFF
+LOCATE COMP "gn[19]" SITE "G18"; # DIFF
+LOCATE COMP "gp[20]" SITE "D18"; # DIFF
+LOCATE COMP "gn[20]" SITE "E17"; # DIFF
+IOBUF PORT "gp[14]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[14]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[15]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[15]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[16]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[16]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[17]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[17]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[18]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[18]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[19]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[19]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[20]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[20]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+LOCATE COMP "gp[21]" SITE "C18"; # DIFF
+LOCATE COMP "gn[21]" SITE "D17"; # DIFF
+LOCATE COMP "gp[22]" SITE "B15";
+LOCATE COMP "gn[22]" SITE "C15";
+LOCATE COMP "gp[23]" SITE "B17";
+LOCATE COMP "gn[23]" SITE "C17";
+LOCATE COMP "gp[24]" SITE "C16";
+LOCATE COMP "gn[24]" SITE "D16";
+LOCATE COMP "gp[25]" SITE "D14";
+LOCATE COMP "gn[25]" SITE "E14";
+LOCATE COMP "gp[26]" SITE "B13";
+LOCATE COMP "gn[26]" SITE "C13";
+LOCATE COMP "gp[27]" SITE "D13";
+LOCATE COMP "gn[27]" SITE "E13";
+IOBUF PORT "gp[21]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[21]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[22]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[22]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[23]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[23]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[24]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[24]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[25]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[25]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[26]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[26]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp[27]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn[27]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+
+## GPIO repeated as individual signals (non-vector)
+# Allows mixed input, output, bidirectional, clock, differential
+# If any of individual gp is used, then don't use gp[] vector.
+# Same for gn and gn[].
+# FEMALE ANGLED (90 deg PMOD) on TOP or
+# MALE VERTICAL ( 0 deg pins) on BOTTOM and flat cable
+LOCATE COMP "gp0" SITE "B11"; # PCLK
+LOCATE COMP "gn0" SITE "C11"; # PCLK
+LOCATE COMP "gp1" SITE "A10"; # PCLK
+LOCATE COMP "gn1" SITE "A11"; # PCLK
+LOCATE COMP "gp2" SITE "A9"; # GR_PCLK
+LOCATE COMP "gn2" SITE "B10"; # GR_PCLK
+LOCATE COMP "gp3" SITE "B9";
+LOCATE COMP "gn3" SITE "C10";
+LOCATE COMP "gp4" SITE "A7";
+LOCATE COMP "gn4" SITE "A8";
+LOCATE COMP "gp5" SITE "C8";
+LOCATE COMP "gn5" SITE "B8";
+LOCATE COMP "gp6" SITE "C6";
+LOCATE COMP "gn6" SITE "C7";
+IOBUF PORT "gp0" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn0" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp1" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn1" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp2" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn2" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp3" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn3" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp4" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn4" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp5" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn5" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp6" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn6" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+LOCATE COMP "gp7" SITE "A6";
+LOCATE COMP "gn7" SITE "B6";
+LOCATE COMP "gp8" SITE "A4"; # DIFF
+LOCATE COMP "gn8" SITE "A5"; # DIFF
+LOCATE COMP "gp9" SITE "A2"; # DIFF
+LOCATE COMP "gn9" SITE "B1"; # DIFF
+LOCATE COMP "gp10" SITE "C4"; # DIFF
+LOCATE COMP "gn10" SITE "B4"; # DIFF
+LOCATE COMP "gp11" SITE "F4"; # DIFF
+LOCATE COMP "gn11" SITE "E3"; # DIFF
+LOCATE COMP "gp12" SITE "G3"; # DIFF wifi_gpio33
+LOCATE COMP "gn12" SITE "F3"; # DIFF wifi_gpio32
+LOCATE COMP "gp13" SITE "H4"; # DIFF wifi_gpio36 / sensor_vp
+LOCATE COMP "gn13" SITE "G5"; # DIFF wifi_gpio39 / sensor_vn
+IOBUF PORT "gp7" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn7" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp8" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn8" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp9" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn9" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp10" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn10" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp11" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn11" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp12" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn12" PULLMODE=NONE IO_TYPE=LVCMOS33;
+FREQUENCY PORT "gn12" 50 MHZ;
+IOBUF PORT "gp13" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn13" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+LOCATE COMP "gp14" SITE "U18"; # DIFF ADC AIN1
+LOCATE COMP "gn14" SITE "U17"; # DIFF ADC AIN0
+LOCATE COMP "gp15" SITE "N17"; # DIFF ADC AIN3
+LOCATE COMP "gn15" SITE "P16"; # DIFF ADC AIN2
+LOCATE COMP "gp16" SITE "N16"; # DIFF ADC AIN5
+LOCATE COMP "gn16" SITE "M17"; # DIFF ADC AIN4
+LOCATE COMP "gp17" SITE "L16"; # DIFF ADC AIN7 GR_PCLK
+LOCATE COMP "gn17" SITE "L17"; # DIFF ADC AIN6
+LOCATE COMP "gp18" SITE "H18"; # DIFF
+LOCATE COMP "gn18" SITE "H17"; # DIFF
+LOCATE COMP "gp19" SITE "F17"; # DIFF
+LOCATE COMP "gn19" SITE "G18"; # DIFF
+LOCATE COMP "gp20" SITE "D18"; # DIFF
+LOCATE COMP "gn20" SITE "E17"; # DIFF
+IOBUF PORT "gp14" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn14" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp15" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn15" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp16" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn16" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp17" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn17" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp18" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn18" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp19" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn19" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp20" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn20" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+LOCATE COMP "gp21" SITE "C18"; # DIFF
+LOCATE COMP "gn21" SITE "D17"; # DIFF
+LOCATE COMP "gp22" SITE "B15";
+LOCATE COMP "gn22" SITE "C15";
+LOCATE COMP "gp23" SITE "B17";
+LOCATE COMP "gn23" SITE "C17";
+LOCATE COMP "gp24" SITE "C16";
+LOCATE COMP "gn24" SITE "D16";
+LOCATE COMP "gp25" SITE "D14";
+LOCATE COMP "gn25" SITE "E14";
+LOCATE COMP "gp26" SITE "B13";
+LOCATE COMP "gn26" SITE "C13";
+LOCATE COMP "gp27" SITE "D13";
+LOCATE COMP "gn27" SITE "E13";
+IOBUF PORT "gp21" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn21" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp22" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn22" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp23" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn23" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp24" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn24" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp25" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn25" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp26" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn26" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gp27" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+IOBUF PORT "gn27" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+
+## PROGRAMN (reload bitstream from FLASH, exit from bootloader)
+LOCATE COMP "user_programn" SITE "M4";
+IOBUF PORT "user_programn" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
+
+## SHUTDOWN "power", "ram" sheet
+LOCATE COMP "shutdown" SITE "G16"; # FPGA receives
+IOBUF PORT "shutdown" PULLMODE=DOWN IO_TYPE=LVCMOS33 DRIVE=4;
diff --git a/rtl/unprivileged_cpu.v b/rtl/unprivileged_cpu.v
new file mode 100644
index 0000000..6b7c4e2
--- /dev/null
+++ b/rtl/unprivileged_cpu.v
@@ -0,0 +1,117 @@
+`default_nettype none
+
+module UnprivilegedCPU
+ (input wire clk,
+ input wire rst,
+ input wire en,
+
+ input wire [15:0] mem_in,
+ output wire [15:0] mem_out,
+ output wire [15:0] mem_addr,
+ output wire mem_isData, // i.e. not instruction
+ output wire mem_we);
+
+ localparam I_ALU = 0;
+ localparam I_ADDI = 1;
+ localparam I_LUI = 2;
+ localparam I_LW = 3;
+ localparam I_SW = 4;
+ localparam I_JALR = 5;
+ localparam I_BRA = 6;
+ localparam I_SYS = 7;
+
+ reg phase = 1;
+ reg [15:0] pc = -1;
+ reg [15:0] savedInstr = 0;
+ wire [15:0] instr = phase? savedInstr : mem_in;
+
+ wire [ 2:0] op = instr[15:13];
+ wire [ 2:0] rA = instr[12:10];
+ wire [ 2:0] rB = instr[ 9: 7];
+ wire [ 2:0] rC = instr[ 6: 4];
+ wire [ 3:0] op2 = instr[ 3: 0];
+ wire [15:0] sImmed = {{10{instr[6]}}, instr[5:0]};
+ wire [15:0] uImmed = {instr[9:0], 6'd0};
+
+ wire memAccess = (op == I_LW) | (op == I_SW);
+ assign mem_isData = memAccess && ~phase;
+
+ wire rC0 = rC == 0;
+ wire sI0 = sImmed == 0;
+ wire uI0 = uImmed == 0;
+
+ // ---------------------------------------------------------------------------
+
+ reg [15:0] regIn;
+ always @* begin
+ regIn = 0;
+ case (op)
+ I_ALU: regIn = aluOut;
+ I_ADDI: regIn = r1Immed;
+ I_LUI: regIn = uI0? pc : uImmed; // TODO pc or incPC?
+ I_LW: regIn = mem_in;
+ I_JALR: regIn = incPC;
+ endcase
+ end
+
+ wire [15:0] reg1, reg2;
+ wire [15:0] r1Immed = reg1 + sImmed;
+ RegFile registers(.clk(clk),
+ .src1(rB), .out1(reg1),
+ .src2((op == I_SW)? rA : rC), .out2(reg2),
+ .tgt(rA), .tgtIn(regIn),
+ .we(en && ((phase && (op == I_LW)) |
+ (op == I_ALU) | (op == I_ADDI) |
+ (op == I_LUI) | (op == I_JALR))));
+
+ // ---------------------------------------------------------------------------
+
+ wire [15:0] aluOut;
+ FullALU alu(.x(reg1), .y(reg2), .op(op2), .special(rC0), .out(aluOut));
+
+ // ---------------------------------------------------------------------------
+
+ wire [15:0] incPC = pc + 1;
+ wire [15:0] bDest = incPC + sImmed;
+
+ wire r1L = reg1[15];
+ wire r1Z = ~|reg1;
+ wire r1G = ~(r1Z | r1L);
+ wire [2:0] conds = {r1G, r1Z, r1L};
+ wire branch = |(rA & conds);
+
+ reg [15:0] nextPC;
+ always @* begin
+ nextPC = incPC;
+ case (op)
+ I_JALR: nextPC = r1Immed;
+ I_BRA: if (branch) nextPC = bDest;
+ endcase
+ end
+
+ wire [15:0] rw_addr = r1Immed;
+ assign mem_out = reg2;
+
+ assign mem_addr = (memAccess & ~phase)? rw_addr : nextPC;
+ assign mem_we = (op == I_SW) & ~phase;
+
+ // ---------------------------------------------------------------------------
+
+ always @(posedge clk) if (en) begin
+ if (rst) begin
+ pc <= -1;
+ phase <= 1;
+ savedInstr <= 0;
+
+ end else if (phase) begin
+ phase <= 0;
+ pc <= nextPC;
+
+ end else begin
+ phase <= memAccess;
+ if (memAccess) savedInstr <= instr;
+ else pc <= nextPC;
+ end
+ end
+
+endmodule
diff --git a/src/echo.prog b/src/echo.prog
new file mode 100644
index 0000000..fe3f644
--- /dev/null
+++ b/src/echo.prog
@@ -0,0 +1,9 @@
+3107E // lw 1,0,@uart
+6117E // blz 1,-2
+03122 // xor 3,1,2
+03303 // msbl 3,3
+6237B // bz 3,-5
+12100 // mv 2,1
+4107F // sw 1,0,@led
+4107E // sw 1,0,@uart
+67077 // j -9
diff --git a/src/f1.f1 b/src/f1.f1
new file mode 100644
index 0000000..660e45a
--- /dev/null
+++ b/src/f1.f1
@@ -0,0 +1,311 @@
+// f1.f1
+// --------------------------------------------
+//
+// Allows backwards absolute calls to single
+// character labels. Each instruction must
+// start on a new line.
+//
+// Static variables:
+// 001F last char
+//
+// --------------------------------------------
+
+// 0020
+// addr table:
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+00000
+
+// --------------------------------------------
+
+:getchar // -> char
+3201F // lw 2,0,@lastChar
+3107E // lw 1,0,@uart
+6117E // blz 1,-2
+03122 // xor 3,1,2
+03303 // msbl 3,3
+6237B // bz 3,-5
+4101F // sw 1,0,@lastChar
+01100 // lsbl 1,1
+50700 // ret
+
+// --------------------------------------------
+
+:putchar // (char n)
+3207E // lw 2,0,@uart
+6127E // blz 2,-2
+4107E // sw 1,0,@uart
+50700 // ret
+
+// --------------------------------------------
+
+:octwrite // (char n)
+12007 // addi 2,0,07
+01120 // and 1,1,2
+11130 // addi 1,1,30
+67078 // j @putchar
+
+// --------------------------------------------
+
+:hexwrite // (char n)
+1200F // addi 2,0,0F
+01120 // and 1,1,2
+12177 // addi 2,1,-9
+64202 // jgz 2,2
+11130 // addi 1,1,30
+67072 // j @putchar
+11137 // addi 1,1,37
+67070 // j @putchar
+
+// --------------------------------------------
+
+:writeword // (int word)
+47600 // | push 7
+4567F // | push 5
+4467E // | push 4
+1667D // +-------
+
+14100 // mv 4,1
+15001 // addi 5,0,01
+
+03554 // add 3,5,5
+03354 // add 3,3,5
+01437 // rol 1,4,3
+@octwrite
+15501 // addi 5,5,01
+1357B // addi 3,5,-5
+61378 // blz 3,-8
+
+11400 // mv 1,4
+@hexwrite
+
+1100A // addi 1,0,0A
+@putchar
+
+16603 // +------
+3467E // | pop 4
+3567F // | pop 5
+37600 // | pop 7
+50700 // ret
+
+// --------------------------------------------
+
+:compilelabel // (char lbl)
+47600 // | push 7
+4467F // | push 4
+1667E // +-------
+
+34100 // lw 4,1,00
+
+1207A // addi 2,0,-6
+01423 // shl 1,4,2
+62103 // bz 1,@zero
+22270 // lui 2,5C00
+01121 // or 1,1,2
+67001 // j @write // TODO make this do single
+21170 // lui 1,3C00 // instruction calls instead
+@writeword
+
+1203F // addi 2,0,3F
+01420 // and 1,4,2
+2257E // lui 2,BF80
+01121 // or 1,1,2
+@writeword
+
+16602 // +------
+3467F // | pop 4
+37600 // | pop 7
+50700 // ret
+
+// --------------------------------------------
+
+:ishex // (char c) -> bool
+1207C // addi 2,0,-4
+02123 // shl 2,1,2
+1227D // addi 2,2,-3 // '0'
+6220A // bz 2,@numeric
+1227F // addi 2,2,-1 // '@'
+62202 // bz 2,@alpha
+11000 // mv 1,0
+50700 // ret
+
+// alpha (TODO currently only capital)
+1200F // addi 2,0,0F
+01120 // and 1,1,2
+1117F // addi 1,1,-1 // 'A'
+12006 // addi 2,0,06
+01129 // ltu 1,1,2
+50700 // ret
+
+// numeric
+1200F // addi 2,0,0F
+01120 // and 1,1,2
+1200A // addi 2,0,0A
+01128 // lt 1,1,2
+50700 // ret
+
+// --------------------------------------------
+
+:findnewword // (bool echo)
+47600 // | push 7
+4467F // | push 4
+1667E // +-------
+
+14100 // mv 4,1
+
+@getchar
+62402 // bz 4,2
+@putchar
+
+// TODO currently only works at start of line
+11176 // addi 1,1,-0A
+65179 // jnz 1,-7
+
+16602 // +------
+3467F // | pop 4
+37600 // | pop 7
+50700 // ret
+
+// --------------------------------------------
+
+:start
+2677F // lui 6,FFC0 // Set up stack pointer
+14020 // addi 4,0,20 // Set up address counter
+4001F // sw 0,0,@lastChar
+
+:loop
+@getchar
+
+// if it's a newline, loop
+1200A // addi 2,0,'\n'
+02125 // sub 2,1,2
+6227B // bz 2,-5
+
+// if it's a colon, savelabel
+1203A // addi 2,0,':'
+02125 // sub 2,1,2
+65204 // bnz 2,@next
+@getchar // Get next char (single char labels)
+44100 // sw 4,1,00 // Put current address in label
+67014 // j @nextLine
+
+// if it's an at sign, compilelabel
+22001 // lui 2,'@'
+02125 // sub 2,1,2
+65206 // bnz 2,@next
+@getchar
+@compilelabel
+14402 // addi 4,4,02
+6700B // j @nextLine
+
+// if it's 0-9A-F, increment
+15100 // mv 5,1
+@ishex
+62107 // j @nextLine
+11500 // mv 1,5
+@putchar
+14401 // addi 4,4,01
+4407F // sw 4,0,@led
+11001 // addi 1,0,01
+67001 // j @nextLine
+
+11000 // mv 1,0
+@findnextword
+
+@loop
+
+@start
+6707D // j -3
diff --git a/src/f1.prog b/src/f1.prog
new file mode 100644
index 0000000..10f5ca2
--- /dev/null
+++ b/src/f1.prog
@@ -0,0 +1,258 @@
+// f1.prog
+// --------------------------------------------
+//
+// Allows backwards absolute calls to single
+// character labels. Each instruction must
+// start on a new line.
+//
+// Static variables:
+// 001F last char
+//
+// --------------------------------------------
+
+// 0020
+// addr table:
+00000 00000 00000 00000 00000 00000 00000 00000
+00000 00000 00000 00000 00000 00000 00000 00000
+00000 00000 00000 00000 00000 00000 00000 00000
+00000 00000 00000 00000 00000 00000 00000 00000
+00000 00000 00000 00000 00000 00000 00000 00000
+00000 00000 00000 00000 00000 00000 00000 00000
+00000 00000 00000 00000 00000 00000 00000 00000
+00000 00000 00000 00000 00000 00000 00000 00000
+00000 00000 00000 00000 00000 00000 00000 00000
+00000 00000 00000 00000 00000 00000 00000 00000
+00000 00000 00000 00000 00000 00000 00000 00000
+00000 00000 00000 00000 00000 00000 00000 00000
+
+
+// 0080
+//:getchar // -> char
+3201F // lw 2,0,@lastChar
+3107E // lw 1,0,@uart
+6117E // blz 1,-2
+03122 // xor 3,1,2
+03303 // msbl 3,3
+6237B // bz 3,-5
+4101F // sw 1,0,@lastChar
+01100 // lsbl 1,1
+50700 // ret
+
+
+// 0089
+//:putchar // (char n)
+3207E // lw 2,0,@uart
+6127E // blz 2,-2
+4107E // sw 1,0,@uart
+50700 // ret
+
+
+// 008D
+//:octwrite // (char n)
+12007 // addi 2,0,07
+01120 // and 1,1,2
+11130 // addi 1,1,30
+67078 // j @putchar
+
+
+// 0091
+//:hexwrite // (char n)
+1200F // addi 2,0,0F
+01120 // and 1,1,2
+12177 // addi 2,1,-9
+64202 // jgz 2,2
+11130 // addi 1,1,30
+67072 // j @putchar
+11137 // addi 1,1,37
+67070 // j @putchar
+
+
+// 0099
+//:writeword // (int word)
+47600 // | push 7
+4567F // | push 5
+4467E // | push 4
+1667D // +-------
+
+14100 // mv 4,1
+15001 // addi 5,0,01
+
+03554 // add 3,5,5
+03354 // add 3,3,5
+01437 // rol 1,4,3
+//@octwrite
+27002 // lui 7,0080
+5770D // jalr 7,7,0D
+15501 // addi 5,5,01
+1357B // addi 3,5,-5
+61378 // blz 3,-8
+
+11400 // mv 1,4
+//@hexwrite
+27002 // lui 7,0080
+57711 // jalr 7,7,11
+
+1100A // addi 1,0,0A
+//@putchar
+27002 // lui 7,0080
+57709 // jalr 7,7,09
+
+16603 // +------
+3467E // | pop 4
+3567F // | pop 5
+37600 // | pop 7
+50700 // ret
+
+
+// 00B2
+//:compilelabel // (char lbl)
+47600 // | push 7
+4467F // | push 4
+1667E // +-------
+
+34100 // lw 4,1,00
+
+1207A // addi 2,0,-6
+01423 // shl 1,4,2
+62103 // bz 1,@zero
+22270 // lui 2,5C00
+01121 // or 1,1,2
+67001 // j @write // TODO make this do single
+21170 // lui 1,3C00 // instruction calls instead
+//@writeword
+27002 // lui 7,0080
+57719 // jalr 7,7,19
+
+1203F // addi 2,0,3F
+01420 // and 1,4,2
+2257E // lui 2,BF80
+01121 // or 1,1,2
+//@writeword
+27002 // lui 7,0080
+57719 // jalr 7,7,19
+
+16602 // +------
+3467F // | pop 4
+37600 // | pop 7
+50700 // ret
+
+
+// 00C6
+//:ishex // (char c) -> bool
+1207C // addi 2,0,-4
+02123 // shl 2,1,2
+1227D // addi 2,2,-3 // '0'
+6220A // bz 2,@numeric
+1227F // addi 2,2,-1 // '@'
+62202 // bz 2,@alpha
+11000 // mv 1,0
+50700 // ret
+
+// alpha (TODO currently only capital)
+1200F // addi 2,0,0F
+01120 // and 1,1,2
+1117F // addi 1,1,-1 // 'A'
+12006 // addi 2,0,06
+01129 // ltu 1,1,2
+50700 // ret
+
+// numeric
+1200F // addi 2,0,0F
+01120 // and 1,1,2
+1200A // addi 2,0,0A
+01128 // lt 1,1,2
+50700 // ret
+
+
+// 00D9
+//:findnewword // (bool echo)
+47600 // | push 7
+4467F // | push 4
+1667E // +-------
+
+14100 // mv 4,1
+
+//@getchar
+27002 // lui 7,0080
+57700 // jalr 7,7,00
+62402 // bz 4,2
+//@putchar
+27002 // lui 7,0080
+57709 // jalr 7,7,09
+
+// TODO currently only works at start of line
+11176 // addi 1,1,-0A
+65179 // jnz 1,-7
+
+16602 // +------
+3467F // | pop 4
+37600 // | pop 7
+50700 // ret
+
+
+//:start
+2677F // lui 6,FFC0 // Set up stack pointer
+14020 // addi 4,0,20 // Set up address counter
+4001F // sw 0,0,@lastChar
+
+//:loop
+//@getchar
+27002 // lui 7,0080
+57700 // jalr 7,7,00
+
+// if it's a newline, loop
+1200A // addi 2,0,'\n'
+02125 // sub 2,1,2
+6227B // bz 2,-5
+
+// if it's a colon, savelabel
+1203A // addi 2,0,':'
+02125 // sub 2,1,2
+65204 // bnz 2,@next
+//@getchar
+27002 // lui 7,0080 // Get next char (single char labels)
+57700 // jalr 7,7,00
+44100 // sw 4,1,00 // Put current address in label
+67014 // j @nextLine
+
+// if it's an at sign, compilelabel
+22001 // lui 2,'@'
+02125 // sub 2,1,2
+65206 // bnz 2,@next
+//@getchar
+27002 // lui 7,0080
+57700 // jalr 7,7,00
+//@compilelabel
+27002 // lui 7,0080
+57732 // jalr 7,7,32
+14402 // addi 4,4,02
+6700B // j @nextLine
+
+// if it's 0-9A-F, increment
+15100 // mv 5,1
+//@ishex
+27003 // lui 7,00C0
+57709 // jalr 7,7,09
+62107 // j @nextLine
+11500 // mv 1,5
+//@putchar
+27002 // lui 7,0080
+57709 // jalr 7,7,09
+14401 // addi 4,4,01
+4407F // sw 4,0,@led
+11001 // addi 1,0,01
+67001 // j @nextLine
+
+11000 // mv 1,0
+//@findnextword
+27003 // lui 7,00C0
+5771C // jalr 7,7,1C
+
+//@loop
+27003 // lui 7,00C0
+5772E // jalr 7,7,2E
+
+//@start
+27003 // lui 7,00C0
+5772B // jalr 7,7,2B
+6707D // j -3
diff --git a/src/led.prog b/src/led.prog
new file mode 100644
index 0000000..90dfb79
--- /dev/null
+++ b/src/led.prog
@@ -0,0 +1,10 @@
+21529 // lui 1,AA40
+11115 // addi 1,1,15
+
+00000 // nop
+//01107 // swpb 1,1
+
+4107F // sw 1,0,@led
+6707F // j -1 // Halt
+
+6707A // j -6 // Start
diff --git a/src/print.prog b/src/print.prog
new file mode 100644
index 0000000..ace5e50
--- /dev/null
+++ b/src/print.prog
@@ -0,0 +1,30 @@
+//-----------------------------------
+// FTDI UART
+12031 // addi 2,0,@str
+
+31200 // lw 1,2,0
+62105 // bez 1,@cont
+12201 // addi 2,2,1
+4107E // sw 1,0,@uart
+3107E // lw 1,0,@uart
+6117E // blz 1,-2
+67079 // j -7
+
+3107E // lw 1,0,@uart
+6117E // blz 1,-2
+03122 // xor 3,1,2
+03303 // msbl 3,3
+6237B // bz 3,-5
+12100 // mv 2,1
+4107F // sw 1,0,@led
+4107E // sw 1,0,@uart
+67077 // j -9
+
+// "Hello, RISC-16! :)\r\n"
+00048 00065 0006C 0006C 0006F 0002C 00020
+00052 00049 00053 00043 0002D 00031 00036
+00021 00020 0003A 00029 0000D 0000A 00000
+
+17020 // addi 7,0,20
+50700 // jr 7
+6707D // j -3
diff --git a/src/sendprog b/src/sendprog
new file mode 100755
index 0000000..bf7f7f8
--- /dev/null
+++ b/src/sendprog
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+PROGRATE=500
+FILERATE=50
+UART="/dev/ttyUSB0"
+stty 115200 -F "$UART"
+
+TTYAFTER=""
+OUTFILE=""
+if [[ "$1" = "-t" ]]; then
+ TTYAFTER="yes"
+ shift
+fi
+if [[ "$1" = "-o" ]]; then
+ shift
+ OUTFILE="-l $1"
+ shift
+fi
+
+PROG="$(cat $1 | sed -e 's_//.*__g' | tr "A-F" "J-O" | tr -d " \n")"
+LEN="$(echo -n "$PROG" | wc -c)"
+echo -n "$PROG" | pv -s "$LEN" --rate-limit $PROGRATE >"$UART"
+echo -n " " >"$UART"
+
+shift
+if [[ "$#" -gt 0 ]]; then
+ if [[ "$TTYAFTER" ]]; then
+ (sleep 1; cat $@ | pv --quiet --rate-limit $FILERATE >"$UART") &
+ else
+ cat $@ | pv --quiet --rate-limit $FILERATE >"$UART"
+ fi
+fi
+
+if [[ "$TTYAFTER" ]]; then
+ tio $OUTFILE -m INLCRNL,OCRNL "$UART"
+fi