From 8d479202b0c9dbe13bb95ad572a060b69642ec26 Mon Sep 17 00:00:00 2001 From: krolxon Date: Mon, 5 Jan 2026 12:12:57 +0530 Subject: add labels, improve documentation, add step debug --- src/assembler.rs | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) (limited to 'src/assembler.rs') diff --git a/src/assembler.rs b/src/assembler.rs index 3cd2bf8..3eeec64 100644 --- a/src/assembler.rs +++ b/src/assembler.rs @@ -1,4 +1,5 @@ use crate::instructions::Instruction; +use std::collections::HashMap; fn tokenize(line: &str) -> Vec { line.split(|c| c == ' ' || c == ',' || c == '\t') @@ -17,14 +18,51 @@ fn parse_reg(s: &str) -> u8 { } } +fn instr_size(tokens: &[String]) -> u16 { + match tokens[0].as_str() { + "mov" | "add" | "sub" | "jmp" | "jz" | "jnz" => 3, + "hlt" => 1, + _ => panic!("Unknown instruction {}", tokens[0]) + + } +} + + +fn first_pass(source: &str) -> HashMap { + let mut symbols = HashMap::new(); + let mut addr: u16 = 0; + + for line in source.lines() { + let line = line.trim(); + + // Ignoring comments and empty lines + if line.is_empty() || line.starts_with(";") { + continue; + } + + // Labels (ends with ":") + if line.ends_with(":") { + let label = line.trim_end_matches(":").to_string(); + symbols.insert(label, addr); + continue; + } + + let tokens = tokenize(line); + addr += instr_size(&tokens); + } + + symbols +} + pub fn assembler(source: &str) -> Vec { + let symbols = first_pass(source); let mut bytes = Vec::new(); for (line_no, line) in source.lines().enumerate() { let line = line.trim(); // Comments in assembly start with ";" - if line.is_empty() || line.starts_with(';') { + if line.is_empty() || line.starts_with(';') || line.ends_with(":") { continue; } @@ -61,6 +99,22 @@ pub fn assembler(source: &str) -> Vec { bytes.push(r2); } + "jmp" | "jz" | "jnz" => { + let opcode = match tokens[0].as_str() { + "jmp" => Instruction::JMP, + "jz" => Instruction::JZ, + "jnz" => Instruction::JNZ, + _ => unreachable!() + }; + + let label = &tokens[1]; + let addr = *symbols.get(label).expect("Uknown label"); + + bytes.push(opcode as u8); + bytes.push((addr & 0xFF) as u8); // low + bytes.push((addr >> 8) as u8); // high + } + "hlt" => { bytes.push(Instruction::HLT as u8); } -- cgit v1.2.3