diff --git a/src/kyupy/sim.py b/src/kyupy/sim.py index 34924db..6ed703b 100644 --- a/src/kyupy/sim.py +++ b/src/kyupy/sim.py @@ -9,6 +9,9 @@ from .circuit import Circuit BUF1 = np.uint16(0b1010_1010_1010_1010) INV1 = ~BUF1 +__const0__ = BUF1 +__const1__ = INV1 + AND2 = np.uint16(0b1000_1000_1000_1000) AND3 = np.uint16(0b1000_0000_1000_0000) AND4 = np.uint16(0b1000_0000_0000_0000) @@ -41,7 +44,10 @@ AOI211, OAI211 = ~AO211, ~OA211 MUX21 = np.uint16(0b1100_1010_1100_1010) # z = i1 if i2 else i0 (i2 is select) -names = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16)]) +names = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16) and '__' not in k]) + +prim2name = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16) and '__' not in k]) +name2prim = dict([(k, v) for k, v in globals().items() if isinstance(v, np.uint16)]) kind_prefixes = { 'nand': (NAND4, NAND3, NAND2), @@ -177,84 +183,75 @@ class SimOps: self.ppo_offset = self.ppi_offset + self.s_len self.c_locs_len = self.ppo_offset + self.s_len - # translate circuit structure into self.ops - ops = [] - interface_dict = dict((n, i) for i, n in enumerate(circuit.s_nodes)) - for n in circuit.topological_order(): - if n in interface_dict: - inp_idx = self.ppi_offset + interface_dict[n] - if len(n.outs) > 0 and n.outs[0] is not None: # first output of a PI/PPI - ops.append((BUF1, n.outs[0].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[n.outs[0]])) - if 'dff' in n.kind.lower(): # second output of DFF is inverted - if len(n.outs) > 1 and n.outs[1] is not None: - ops.append((INV1, n.outs[1].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[n.outs[1]])) - else: # if not DFF, no output is inverted. - for o_line in n.outs[1:]: - if o_line is not None: - ops.append((BUF1, o_line.index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[o_line])) - continue - # regular node, not PI/PPI or PO/PPO - o0_idx = n.outs[0].index if len(n.outs) > 0 and n.outs[0] is not None else self.tmp_idx - i0_idx = n.ins[0].index if len(n.ins) > 0 and n.ins[0] is not None else self.zero_idx - i1_idx = n.ins[1].index if len(n.ins) > 1 and n.ins[1] is not None else self.zero_idx - i2_idx = n.ins[2].index if len(n.ins) > 2 and n.ins[2] is not None else self.zero_idx - i3_idx = n.ins[3].index if len(n.ins) > 3 and n.ins[3] is not None else self.zero_idx - kind = n.kind.lower() - if kind == '__fork__': - if not strip_forks: - for o_line in n.outs: - if o_line is not None: - ops.append((BUF1, o_line.index, i0_idx, i1_idx, i2_idx, i3_idx, *a_ctrl[o_line])) - continue - sp = None - for prefix, prims in kind_prefixes.items(): - if kind.startswith(prefix): - sp = prims[0] - if i3_idx == self.zero_idx: - sp = prims[1] - if i2_idx == self.zero_idx: - sp = prims[2] - break - if sp is None: - print('unknown cell type', kind) - else: - ops.append((sp, o0_idx, i0_idx, i1_idx, i2_idx, i3_idx, *a_ctrl[o0_idx])) - - self.ops = np.asarray(ops, dtype='int32') + # ALAP-toposort the circuit into self.ops + levels = [] + + ppio2idx = dict((n, i) for i, n in enumerate(circuit.s_nodes)) + pis = set([n for n in circuit.s_nodes if len(n.ins) == 0]) + ppos = set([n for n in circuit.s_nodes if len(n.ins) > 0]) + readers = np.array([1 if l.reader in ppos else len(l.reader.outs) for l in circuit.lines], dtype=np.int32) # for ref-counting forks + + level_lines = [n.ins[0] for n in ppos] # start from PPOs + # FIXME: Should probably instanciate buffers for PPOs and attach DFF clocks + + while len(level_lines) > 0: # traverse the circuit level-wise back towards (P)PIs + level_ops = [] + prev_level_lines = [] + + for l in level_lines: + n = l.driver + in_idxs = [n.ins[x].index if len(n.ins) > x and n.ins[x] is not None else self.zero_idx for x in [0,1,2,3]] + if n in ppio2idx: + in_idxs[0] = self.ppi_offset + ppio2idx[n] + if l.driver_pin == 1 and 'dff' in n.kind.lower(): # second output of DFF is inverted + level_ops.append((INV1, l.index, *in_idxs, *a_ctrl[l])) + else: + level_ops.append((BUF1, l.index, *in_idxs, *a_ctrl[l])) + elif n.kind == '__fork__': + readers[n.ins[0]] -= 1 + if readers[n.ins[0]] == 0: prev_level_lines.append(n.ins[0]) + if not strip_forks: level_ops.append((BUF1, l.index, *in_idxs, *a_ctrl[l])) + else: + prev_level_lines += n.ins + sp = None + kind = n.kind.lower() + for prefix, prims in kind_prefixes.items(): + if kind.startswith(prefix): + sp = prims[0] + if in_idxs[3] == self.zero_idx: + sp = prims[1] + if in_idxs[2] == self.zero_idx: + sp = prims[2] + break + if sp is None: + print('unknown cell type', kind) + else: + level_ops.append((sp, l.index, *in_idxs, *a_ctrl[l])) + + if len(level_ops) > 0: levels.append(level_ops) + level_lines = prev_level_lines + + self.levels = [np.asarray(lv, dtype=np.int32) for lv in levels[::-1]] + level_sums = np.cumsum([0]+[len(lv) for lv in self.levels], dtype=np.int32) + self.level_starts, self.level_stops = level_sums[:-1], level_sums[1:] + self.ops = np.vstack(self.levels) # create a map from fanout lines to stem lines for fork stripping - stems = np.zeros(self.c_locs_len, dtype='int32') - 1 # default to -1: 'no fanout line' + stems = np.full(self.c_locs_len, -1, dtype=np.int32) # default to -1: 'no fanout line' if strip_forks: for f in circuit.forks.values(): prev_line = f.ins[0] while prev_line.driver.kind == '__fork__': prev_line = prev_line.driver.ins[0] - stem_idx = prev_line.index for ol in f.outs: if ol is not None: - stems[ol] = stem_idx - - # calculate level (distance from PI/PPI) and reference count for each line - levels = np.zeros(self.c_locs_len, dtype='int32') - ref_count = np.zeros(self.c_locs_len, dtype='int32') - level_starts = [0] - current_level = 1 - for i, op in enumerate(self.ops): - # if we fork-strip, always take the stems for determining fan-in level - i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2] - i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3] - i2_idx = stems[op[4]] if stems[op[4]] >= 0 else op[4] - i3_idx = stems[op[5]] if stems[op[5]] >= 0 else op[5] - if levels[i0_idx] >= current_level or levels[i1_idx] >= current_level or levels[i2_idx] >= current_level or levels[i3_idx] >= current_level: - current_level += 1 - level_starts.append(i) - levels[op[1]] = current_level # set level of the output line - ref_count[i0_idx] += 1 - ref_count[i1_idx] += 1 - ref_count[i2_idx] += 1 - ref_count[i3_idx] += 1 - self.level_starts = np.asarray(level_starts, dtype='int32') - self.level_stops = np.asarray(level_starts[1:] + [len(self.ops)], dtype='int32') + stems[ol] = prev_line.index + + ref_count = np.zeros(self.c_locs_len, dtype=np.int32) + + for op in self.ops: + for x in [2, 3, 4, 5]: + ref_count[stems[op[x]] if stems[op[x]] >= 0 else op[x]] += 1 # combinational signal allocation table. maps line and interface indices to self.c memory locations self.c_locs = np.full((self.c_locs_len,), -1, dtype=np.int32) @@ -280,9 +277,9 @@ class SimOps: ref_count[i0_idx] += 1 # allocate memory for the rest of the circuit - for op_start, op_stop in zip(self.level_starts, self.level_stops): + for ops in self.levels: free_set = set() - for op in self.ops[op_start:op_stop]: + for op in ops: # if we fork-strip, always take the stems i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2] i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3] @@ -301,7 +298,8 @@ class SimOps: self.c_locs[o_idx], self.c_caps[o_idx] = h.alloc(cap), cap if c_reuse: for loc in free_set: - h.free(loc) + if loc >= 0: # DFF clocks are not allocated. Ignore for now. + h.free(loc) # copy memory location and capacity from stems to fanout lines for lidx, stem in enumerate(stems): diff --git a/tests/b15_4ig.sa_rf.stil.gz b/tests/b15_4ig.sa_rf.stil.gz new file mode 100644 index 0000000..437a3e0 Binary files /dev/null and b/tests/b15_4ig.sa_rf.stil.gz differ diff --git a/tests/conftest.py b/tests/conftest.py index c73bdc5..a78f6ce 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,13 @@ def b15_2ig_circuit(mydir): from kyupy.techlib import SAED32 return verilog.load(mydir / 'b15_2ig.v.gz', branchforks=True, tlib=SAED32) +@pytest.fixture(scope='session') +def b15_2ig_circuit_resolved(b15_2ig_circuit): + from kyupy.techlib import SAED32 + cr = b15_2ig_circuit.copy() + cr.resolve_tlib_cells(SAED32) + return cr + @pytest.fixture(scope='session') def b15_2ig_delays(mydir, b15_2ig_circuit): from kyupy import sdf diff --git a/tests/test_logic_sim.py b/tests/test_logic_sim.py index 5849310..8086c3e 100644 --- a/tests/test_logic_sim.py +++ b/tests/test_logic_sim.py @@ -173,3 +173,50 @@ def test_b01(mydir): s.c_prop() s.c_to_s() bp_to_mv(s.s[1]) + + +def sim_and_compare(v_file, stil_file, m=8): + from kyupy import verilog, stil + from kyupy.techlib import SAED32 + c = verilog.load(v_file, branchforks=True, tlib=SAED32) + c.resolve_tlib_cells(SAED32) + s = stil.load(stil_file) + tests = s.tests(c)[:,1:] + resp = s.responses(c)[:,1:] + lsim = LogicSim(c, m=m, sims=tests.shape[1]) + lsim.s[0] = logic.mv_to_bp(tests) + lsim.s_to_c() + lsim.c_prop() + lsim.c_to_s() + resp_sim = logic.bp_to_mv(lsim.s[1])[:,:tests.shape[1]] + idxs, pats = np.nonzero(((resp == logic.ONE) & (resp_sim != logic.ONE)) | ((resp == logic.ZERO) & (resp_sim != logic.ZERO))) + for i, (idx, pat) in enumerate(zip(idxs, pats)): + if i >= 10: + print(f'...') + break + print(f'mismatch pattern:{pat} ppio:{idx} exp:{logic.mv_str(resp[idx,pat])} act:{logic.mv_str(resp_sim[idx,pat])}') + assert len(idxs) == 0 + + +def test_b15_2ig_sa_2v(mydir): + sim_and_compare(mydir / 'b15_2ig.v.gz', mydir / 'b15_2ig.sa_nf.stil.gz', m=2) + + +def test_b15_2ig_sa_4v(mydir): + sim_and_compare(mydir / 'b15_2ig.v.gz', mydir / 'b15_2ig.sa_nf.stil.gz', m=4) + + +def test_b15_2ig_sa_8v(mydir): + sim_and_compare(mydir / 'b15_2ig.v.gz', mydir / 'b15_2ig.sa_nf.stil.gz', m=8) + + +def test_b15_4ig_sa_2v(mydir): + sim_and_compare(mydir / 'b15_4ig.v.gz', mydir / 'b15_4ig.sa_rf.stil.gz', m=2) + + +def test_b15_4ig_sa_4v(mydir): + sim_and_compare(mydir / 'b15_4ig.v.gz', mydir / 'b15_4ig.sa_rf.stil.gz', m=4) + + +def test_b15_4ig_sa_8v(mydir): + sim_and_compare(mydir / 'b15_4ig.v.gz', mydir / 'b15_4ig.sa_rf.stil.gz', m=8) diff --git a/tests/test_wave_sim.py b/tests/test_wave_sim.py index 1e2cc67..d09cfc1 100644 --- a/tests/test_wave_sim.py +++ b/tests/test_wave_sim.py @@ -177,7 +177,7 @@ def compare_to_logic_sim(wsim: WaveSim): lsim.s_to_c() lsim.c_prop() lsim.c_to_s() - exp = logic.bp_to_mv(lsim.s[1]) + exp = logic.bp_to_mv(lsim.s[1])[:,:tests.shape[-1]] resp[resp == logic.PPULSE] = logic.ZERO resp[resp == logic.NPULSE] = logic.ONE @@ -188,13 +188,13 @@ def compare_to_logic_sim(wsim: WaveSim): np.testing.assert_allclose(resp, exp) -def test_b15(b15_2ig_circuit, b15_2ig_delays): - compare_to_logic_sim(WaveSim(b15_2ig_circuit, b15_2ig_delays, 8)) +def test_b15(b15_2ig_circuit_resolved, b15_2ig_delays): + compare_to_logic_sim(WaveSim(b15_2ig_circuit_resolved, b15_2ig_delays, 8)) -def test_b15_strip_forks(b15_2ig_circuit, b15_2ig_delays): - compare_to_logic_sim(WaveSim(b15_2ig_circuit, b15_2ig_delays, 8, strip_forks=True)) +def test_b15_strip_forks(b15_2ig_circuit_resolved, b15_2ig_delays): + compare_to_logic_sim(WaveSim(b15_2ig_circuit_resolved, b15_2ig_delays, 8, strip_forks=True)) -def test_b15_cuda(b15_2ig_circuit, b15_2ig_delays): - compare_to_logic_sim(WaveSimCuda(b15_2ig_circuit, b15_2ig_delays, 8, strip_forks=True)) +def test_b15_cuda(b15_2ig_circuit_resolved, b15_2ig_delays): + compare_to_logic_sim(WaveSimCuda(b15_2ig_circuit_resolved, b15_2ig_delays, 8, strip_forks=True))