aboutsummaryrefslogtreecommitdiff
path: root/src/assembler.rs
blob: 3eeec64df021d4b644bee8626dcf09bbc8b50dba (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
use crate::instructions::Instruction;
use std::collections::HashMap;

fn tokenize(line: &str) -> Vec<String> {
    line.split(|c| c == ' ' || c == ',' || c == '\t')
        .filter(|s| !s.is_empty())
        .map(|s| s.to_lowercase())
        .collect()
}

fn parse_reg(s: &str) -> u8 {
    match s {
        "a" => 0,
        "b" => 1,
        "c" => 2,
        "d" => 3,
        _ => panic!("Unknown register {}", s),
    }
}

fn instr_size(tokens: &[String]) ->  u16 {
    match tokens[0].as_str() {
        "mov" | "add" | "sub" | "jmp" | "jz" | "jnz" => 3,
        "hlt" => 1,
        _ => panic!("Unknown instruction {}", tokens[0])

    }
}


fn first_pass(source: &str) -> HashMap<String, u16> {
    let mut symbols = HashMap::new();
    let mut addr: u16 = 0;

    for line in source.lines() {
        let line = line.trim();

        // Ignoring comments and empty lines
        if line.is_empty() || line.starts_with(";") {
            continue;
        }

        // Labels (ends with ":")
        if line.ends_with(":") {
            let label = line.trim_end_matches(":").to_string();
            symbols.insert(label, addr);
            continue;
        }

        let tokens = tokenize(line);
        addr += instr_size(&tokens);
    }

    symbols
}

pub fn assembler(source: &str) -> Vec<u8> {
    let symbols = first_pass(source);
    let mut bytes = Vec::new();

    for (line_no, line) in source.lines().enumerate() {
        let line = line.trim();

        // Comments in assembly start with ";"
        if line.is_empty() || line.starts_with(';') || line.ends_with(":") {
            continue;
        }

        let tokens = tokenize(line);

        match tokens[0].as_str() {
            "mov" => {
                // mov reg, imm
                let reg = parse_reg(&tokens[1]);
                let imm: u8 = tokens[2].parse().unwrap();

                bytes.push(Instruction::MOV as u8);
                bytes.push(reg);
                bytes.push(imm);
            }

            "add" => {
                // add a, b
                let r1 = parse_reg(&tokens[1]);
                let r2 = parse_reg(&tokens[2]);

                bytes.push(Instruction::ADD as u8);
                bytes.push(r1);
                bytes.push(r2);
            }

            "sub" => {
                // sub a, b
                let r1 = parse_reg(&tokens[1]);
                let r2 = parse_reg(&tokens[2]);

                bytes.push(Instruction::SUB as u8);
                bytes.push(r1);
                bytes.push(r2);
            }

            "jmp" | "jz" | "jnz" => {
                let opcode = match tokens[0].as_str() {
                    "jmp" => Instruction::JMP,
                    "jz" => Instruction::JZ,
                    "jnz" => Instruction::JNZ,
                    _ => unreachable!()
                };

                let label = &tokens[1];
                let addr = *symbols.get(label).expect("Uknown label");

                bytes.push(opcode as u8);
                bytes.push((addr & 0xFF) as u8); // low
                bytes.push((addr >> 8) as u8); // high
            }

            "hlt" => {
                bytes.push(Instruction::HLT as u8);
            }

            _ => panic!("Line {}: unknown instruction", line_no + 1),
        }
    }

    bytes
}