Compare commits

..

3 Commits

Author SHA1 Message Date
Stefan Holst a6d1e4099c alap toposort, improve tests 1 year ago
Stefan Holst 1654915ed6 support for partial re-sim 1 year ago
Stefan Holst d2a2484efa fix fault injection 1 year ago
  1. 5
      src/kyupy/logic_sim.py
  2. 120
      src/kyupy/sim.py
  3. 4
      src/kyupy/wave_sim.py
  4. BIN
      tests/b15_4ig.sa_rf.stil.gz
  5. 7
      tests/conftest.py
  6. 47
      tests/test_logic_sim.py
  7. 14
      tests/test_wave_sim.py

5
src/kyupy/logic_sim.py

@ -105,7 +105,7 @@ class LogicSim(sim.SimOps):
elif op == sim.OAI211:self.c[o0] = ~((self.c[i0] | self.c[i1]) & self.c[i2] & self.c[i3]) elif op == sim.OAI211:self.c[o0] = ~((self.c[i0] | self.c[i1]) & self.c[i2] & self.c[i3])
elif op == sim.MUX21: self.c[o0] = (self.c[i0] & ~self.c[i2]) | (self.c[i1] & self.c[i2]) elif op == sim.MUX21: self.c[o0] = (self.c[i0] & ~self.c[i2]) | (self.c[i1] & self.c[i2])
else: print(f'unknown op {op}') else: print(f'unknown op {op}')
inject_cb(o0, self.s[o0]) inject_cb(o0, self.c[o0])
elif self.m == 4: elif self.m == 4:
for op, o0, i0, i1, i2, i3 in self.ops[:,:6]: for op, o0, i0, i1, i2, i3 in self.ops[:,:6]:
o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)] o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)]
@ -181,6 +181,7 @@ class LogicSim(sim.SimOps):
logic.bp4v_and(self.c[t1], self.c[i1], self.c[i2]) logic.bp4v_and(self.c[t1], self.c[i1], self.c[i2])
logic.bp4v_or(self.c[o0], self.c[t0], self.c[t1]) logic.bp4v_or(self.c[o0], self.c[t0], self.c[t1])
else: print(f'unknown op {op}') else: print(f'unknown op {op}')
if inject_cb is not None: inject_cb(o0, self.c[o0])
else: else:
for op, o0, i0, i1, i2, i3 in self.ops[:,:6]: for op, o0, i0, i1, i2, i3 in self.ops[:,:6]:
o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)] o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)]
@ -256,7 +257,7 @@ class LogicSim(sim.SimOps):
logic.bp8v_and(self.c[t1], self.c[i1], self.c[i2]) logic.bp8v_and(self.c[t1], self.c[i1], self.c[i2])
logic.bp8v_or(self.c[o0], self.c[t0], self.c[t1]) logic.bp8v_or(self.c[o0], self.c[t0], self.c[t1])
else: print(f'unknown op {op}') else: print(f'unknown op {op}')
if inject_cb is not None: inject_cb(o0, self.s[o0]) if inject_cb is not None: inject_cb(o0, self.c[o0])
def c_to_s(self): def c_to_s(self):
"""Copies (captures) the results of the combinational portion to ``s[1]``. """Copies (captures) the results of the combinational portion to ``s[1]``.

120
src/kyupy/sim.py

@ -9,6 +9,9 @@ from .circuit import Circuit
BUF1 = np.uint16(0b1010_1010_1010_1010) BUF1 = np.uint16(0b1010_1010_1010_1010)
INV1 = ~BUF1 INV1 = ~BUF1
__const0__ = BUF1
__const1__ = INV1
AND2 = np.uint16(0b1000_1000_1000_1000) AND2 = np.uint16(0b1000_1000_1000_1000)
AND3 = np.uint16(0b1000_0000_1000_0000) AND3 = np.uint16(0b1000_0000_1000_0000)
AND4 = np.uint16(0b1000_0000_0000_0000) AND4 = np.uint16(0b1000_0000_0000_0000)
@ -41,7 +44,10 @@ AOI211, OAI211 = ~AO211, ~OA211
MUX21 = np.uint16(0b1100_1010_1100_1010) # z = i1 if i2 else i0 (i2 is select) MUX21 = np.uint16(0b1100_1010_1100_1010) # z = i1 if i2 else i0 (i2 is select)
names = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16)]) names = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16) and '__' not in k])
prim2name = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16) and '__' not in k])
name2prim = dict([(k, v) for k, v in globals().items() if isinstance(v, np.uint16)])
kind_prefixes = { kind_prefixes = {
'nand': (NAND4, NAND3, NAND2), 'nand': (NAND4, NAND3, NAND2),
@ -177,84 +183,75 @@ class SimOps:
self.ppo_offset = self.ppi_offset + self.s_len self.ppo_offset = self.ppi_offset + self.s_len
self.c_locs_len = self.ppo_offset + self.s_len self.c_locs_len = self.ppo_offset + self.s_len
# translate circuit structure into self.ops # ALAP-toposort the circuit into self.ops
ops = [] levels = []
interface_dict = dict((n, i) for i, n in enumerate(circuit.s_nodes))
for n in circuit.topological_order(): ppio2idx = dict((n, i) for i, n in enumerate(circuit.s_nodes))
if n in interface_dict: pis = set([n for n in circuit.s_nodes if len(n.ins) == 0])
inp_idx = self.ppi_offset + interface_dict[n] ppos = set([n for n in circuit.s_nodes if len(n.ins) > 0])
if len(n.outs) > 0 and n.outs[0] is not None: # first output of a PI/PPI readers = np.array([1 if l.reader in ppos else len(l.reader.outs) for l in circuit.lines], dtype=np.int32) # for ref-counting forks
ops.append((BUF1, n.outs[0].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[n.outs[0]]))
if 'dff' in n.kind.lower(): # second output of DFF is inverted level_lines = [n.ins[0] for n in ppos] # start from PPOs
if len(n.outs) > 1 and n.outs[1] is not None: # FIXME: Should probably instanciate buffers for PPOs and attach DFF clocks
ops.append((INV1, n.outs[1].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[n.outs[1]]))
else: # if not DFF, no output is inverted. while len(level_lines) > 0: # traverse the circuit level-wise back towards (P)PIs
for o_line in n.outs[1:]: level_ops = []
if o_line is not None: prev_level_lines = []
ops.append((BUF1, o_line.index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[o_line]))
continue for l in level_lines:
# regular node, not PI/PPI or PO/PPO n = l.driver
o0_idx = n.outs[0].index if len(n.outs) > 0 and n.outs[0] is not None else self.tmp_idx in_idxs = [n.ins[x].index if len(n.ins) > x and n.ins[x] is not None else self.zero_idx for x in [0,1,2,3]]
i0_idx = n.ins[0].index if len(n.ins) > 0 and n.ins[0] is not None else self.zero_idx if n in ppio2idx:
i1_idx = n.ins[1].index if len(n.ins) > 1 and n.ins[1] is not None else self.zero_idx in_idxs[0] = self.ppi_offset + ppio2idx[n]
i2_idx = n.ins[2].index if len(n.ins) > 2 and n.ins[2] is not None else self.zero_idx if l.driver_pin == 1 and 'dff' in n.kind.lower(): # second output of DFF is inverted
i3_idx = n.ins[3].index if len(n.ins) > 3 and n.ins[3] is not None else self.zero_idx level_ops.append((INV1, l.index, *in_idxs, *a_ctrl[l]))
kind = n.kind.lower() else:
if kind == '__fork__': level_ops.append((BUF1, l.index, *in_idxs, *a_ctrl[l]))
if not strip_forks: elif n.kind == '__fork__':
for o_line in n.outs: readers[n.ins[0]] -= 1
if o_line is not None: if readers[n.ins[0]] == 0: prev_level_lines.append(n.ins[0])
ops.append((BUF1, o_line.index, i0_idx, i1_idx, i2_idx, i3_idx, *a_ctrl[o_line])) if not strip_forks: level_ops.append((BUF1, l.index, *in_idxs, *a_ctrl[l]))
continue else:
prev_level_lines += n.ins
sp = None sp = None
kind = n.kind.lower()
for prefix, prims in kind_prefixes.items(): for prefix, prims in kind_prefixes.items():
if kind.startswith(prefix): if kind.startswith(prefix):
sp = prims[0] sp = prims[0]
if i3_idx == self.zero_idx: if in_idxs[3] == self.zero_idx:
sp = prims[1] sp = prims[1]
if i2_idx == self.zero_idx: if in_idxs[2] == self.zero_idx:
sp = prims[2] sp = prims[2]
break break
if sp is None: if sp is None:
print('unknown cell type', kind) print('unknown cell type', kind)
else: else:
ops.append((sp, o0_idx, i0_idx, i1_idx, i2_idx, i3_idx, *a_ctrl[o0_idx])) level_ops.append((sp, l.index, *in_idxs, *a_ctrl[l]))
if len(level_ops) > 0: levels.append(level_ops)
level_lines = prev_level_lines
self.ops = np.asarray(ops, dtype='int32') self.levels = [np.asarray(lv, dtype=np.int32) for lv in levels[::-1]]
level_sums = np.cumsum([0]+[len(lv) for lv in self.levels], dtype=np.int32)
self.level_starts, self.level_stops = level_sums[:-1], level_sums[1:]
self.ops = np.vstack(self.levels)
# create a map from fanout lines to stem lines for fork stripping # create a map from fanout lines to stem lines for fork stripping
stems = np.zeros(self.c_locs_len, dtype='int32') - 1 # default to -1: 'no fanout line' stems = np.full(self.c_locs_len, -1, dtype=np.int32) # default to -1: 'no fanout line'
if strip_forks: if strip_forks:
for f in circuit.forks.values(): for f in circuit.forks.values():
prev_line = f.ins[0] prev_line = f.ins[0]
while prev_line.driver.kind == '__fork__': while prev_line.driver.kind == '__fork__':
prev_line = prev_line.driver.ins[0] prev_line = prev_line.driver.ins[0]
stem_idx = prev_line.index
for ol in f.outs: for ol in f.outs:
if ol is not None: if ol is not None:
stems[ol] = stem_idx stems[ol] = prev_line.index
# calculate level (distance from PI/PPI) and reference count for each line ref_count = np.zeros(self.c_locs_len, dtype=np.int32)
levels = np.zeros(self.c_locs_len, dtype='int32')
ref_count = np.zeros(self.c_locs_len, dtype='int32') for op in self.ops:
level_starts = [0] for x in [2, 3, 4, 5]:
current_level = 1 ref_count[stems[op[x]] if stems[op[x]] >= 0 else op[x]] += 1
for i, op in enumerate(self.ops):
# if we fork-strip, always take the stems for determining fan-in level
i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2]
i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3]
i2_idx = stems[op[4]] if stems[op[4]] >= 0 else op[4]
i3_idx = stems[op[5]] if stems[op[5]] >= 0 else op[5]
if levels[i0_idx] >= current_level or levels[i1_idx] >= current_level or levels[i2_idx] >= current_level or levels[i3_idx] >= current_level:
current_level += 1
level_starts.append(i)
levels[op[1]] = current_level # set level of the output line
ref_count[i0_idx] += 1
ref_count[i1_idx] += 1
ref_count[i2_idx] += 1
ref_count[i3_idx] += 1
self.level_starts = np.asarray(level_starts, dtype='int32')
self.level_stops = np.asarray(level_starts[1:] + [len(self.ops)], dtype='int32')
# combinational signal allocation table. maps line and interface indices to self.c memory locations # combinational signal allocation table. maps line and interface indices to self.c memory locations
self.c_locs = np.full((self.c_locs_len,), -1, dtype=np.int32) self.c_locs = np.full((self.c_locs_len,), -1, dtype=np.int32)
@ -280,9 +277,9 @@ class SimOps:
ref_count[i0_idx] += 1 ref_count[i0_idx] += 1
# allocate memory for the rest of the circuit # allocate memory for the rest of the circuit
for op_start, op_stop in zip(self.level_starts, self.level_stops): for ops in self.levels:
free_set = set() free_set = set()
for op in self.ops[op_start:op_stop]: for op in ops:
# if we fork-strip, always take the stems # if we fork-strip, always take the stems
i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2] i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2]
i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3] i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3]
@ -301,6 +298,7 @@ class SimOps:
self.c_locs[o_idx], self.c_caps[o_idx] = h.alloc(cap), cap self.c_locs[o_idx], self.c_caps[o_idx] = h.alloc(cap), cap
if c_reuse: if c_reuse:
for loc in free_set: for loc in free_set:
if loc >= 0: # DFF clocks are not allocated. Ignore for now.
h.free(loc) h.free(loc)
# copy memory location and capacity from stems to fanout lines # copy memory location and capacity from stems to fanout lines

4
src/kyupy/wave_sim.py

@ -377,9 +377,11 @@ class WaveSimCuda(WaveSim):
def _grid_dim(self, x, y): return cdiv(x, self._block_dim[0]), cdiv(y, self._block_dim[1]) def _grid_dim(self, x, y): return cdiv(x, self._block_dim[0]), cdiv(y, self._block_dim[1])
def c_prop(self, sims=None, seed=1): def c_prop(self, sims=None, seed=1, op_from=0, op_to=None):
sims = min(sims or self.sims, self.sims) sims = min(sims or self.sims, self.sims)
for op_start, op_stop in zip(self.level_starts, self.level_stops): for op_start, op_stop in zip(self.level_starts, self.level_stops):
if op_from > op_start: continue
if op_to is not None and op_to <= op_start: break
grid_dim = self._grid_dim(sims, op_stop - op_start) grid_dim = self._grid_dim(sims, op_stop - op_start)
wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.abuf, int(0), wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.abuf, int(0),
sims, self.delays, self.simctl_int, seed) sims, self.delays, self.simctl_int, seed)

BIN
tests/b15_4ig.sa_rf.stil.gz

Binary file not shown.

7
tests/conftest.py

@ -13,6 +13,13 @@ def b15_2ig_circuit(mydir):
from kyupy.techlib import SAED32 from kyupy.techlib import SAED32
return verilog.load(mydir / 'b15_2ig.v.gz', branchforks=True, tlib=SAED32) return verilog.load(mydir / 'b15_2ig.v.gz', branchforks=True, tlib=SAED32)
@pytest.fixture(scope='session')
def b15_2ig_circuit_resolved(b15_2ig_circuit):
from kyupy.techlib import SAED32
cr = b15_2ig_circuit.copy()
cr.resolve_tlib_cells(SAED32)
return cr
@pytest.fixture(scope='session') @pytest.fixture(scope='session')
def b15_2ig_delays(mydir, b15_2ig_circuit): def b15_2ig_delays(mydir, b15_2ig_circuit):
from kyupy import sdf from kyupy import sdf

47
tests/test_logic_sim.py

@ -173,3 +173,50 @@ def test_b01(mydir):
s.c_prop() s.c_prop()
s.c_to_s() s.c_to_s()
bp_to_mv(s.s[1]) bp_to_mv(s.s[1])
def sim_and_compare(v_file, stil_file, m=8):
from kyupy import verilog, stil
from kyupy.techlib import SAED32
c = verilog.load(v_file, branchforks=True, tlib=SAED32)
c.resolve_tlib_cells(SAED32)
s = stil.load(stil_file)
tests = s.tests(c)[:,1:]
resp = s.responses(c)[:,1:]
lsim = LogicSim(c, m=m, sims=tests.shape[1])
lsim.s[0] = logic.mv_to_bp(tests)
lsim.s_to_c()
lsim.c_prop()
lsim.c_to_s()
resp_sim = logic.bp_to_mv(lsim.s[1])[:,:tests.shape[1]]
idxs, pats = np.nonzero(((resp == logic.ONE) & (resp_sim != logic.ONE)) | ((resp == logic.ZERO) & (resp_sim != logic.ZERO)))
for i, (idx, pat) in enumerate(zip(idxs, pats)):
if i >= 10:
print(f'...')
break
print(f'mismatch pattern:{pat} ppio:{idx} exp:{logic.mv_str(resp[idx,pat])} act:{logic.mv_str(resp_sim[idx,pat])}')
assert len(idxs) == 0
def test_b15_2ig_sa_2v(mydir):
sim_and_compare(mydir / 'b15_2ig.v.gz', mydir / 'b15_2ig.sa_nf.stil.gz', m=2)
def test_b15_2ig_sa_4v(mydir):
sim_and_compare(mydir / 'b15_2ig.v.gz', mydir / 'b15_2ig.sa_nf.stil.gz', m=4)
def test_b15_2ig_sa_8v(mydir):
sim_and_compare(mydir / 'b15_2ig.v.gz', mydir / 'b15_2ig.sa_nf.stil.gz', m=8)
def test_b15_4ig_sa_2v(mydir):
sim_and_compare(mydir / 'b15_4ig.v.gz', mydir / 'b15_4ig.sa_rf.stil.gz', m=2)
def test_b15_4ig_sa_4v(mydir):
sim_and_compare(mydir / 'b15_4ig.v.gz', mydir / 'b15_4ig.sa_rf.stil.gz', m=4)
def test_b15_4ig_sa_8v(mydir):
sim_and_compare(mydir / 'b15_4ig.v.gz', mydir / 'b15_4ig.sa_rf.stil.gz', m=8)

14
tests/test_wave_sim.py

@ -177,7 +177,7 @@ def compare_to_logic_sim(wsim: WaveSim):
lsim.s_to_c() lsim.s_to_c()
lsim.c_prop() lsim.c_prop()
lsim.c_to_s() lsim.c_to_s()
exp = logic.bp_to_mv(lsim.s[1]) exp = logic.bp_to_mv(lsim.s[1])[:,:tests.shape[-1]]
resp[resp == logic.PPULSE] = logic.ZERO resp[resp == logic.PPULSE] = logic.ZERO
resp[resp == logic.NPULSE] = logic.ONE resp[resp == logic.NPULSE] = logic.ONE
@ -188,13 +188,13 @@ def compare_to_logic_sim(wsim: WaveSim):
np.testing.assert_allclose(resp, exp) np.testing.assert_allclose(resp, exp)
def test_b15(b15_2ig_circuit, b15_2ig_delays): def test_b15(b15_2ig_circuit_resolved, b15_2ig_delays):
compare_to_logic_sim(WaveSim(b15_2ig_circuit, b15_2ig_delays, 8)) compare_to_logic_sim(WaveSim(b15_2ig_circuit_resolved, b15_2ig_delays, 8))
def test_b15_strip_forks(b15_2ig_circuit, b15_2ig_delays): def test_b15_strip_forks(b15_2ig_circuit_resolved, b15_2ig_delays):
compare_to_logic_sim(WaveSim(b15_2ig_circuit, b15_2ig_delays, 8, strip_forks=True)) compare_to_logic_sim(WaveSim(b15_2ig_circuit_resolved, b15_2ig_delays, 8, strip_forks=True))
def test_b15_cuda(b15_2ig_circuit, b15_2ig_delays): def test_b15_cuda(b15_2ig_circuit_resolved, b15_2ig_delays):
compare_to_logic_sim(WaveSimCuda(b15_2ig_circuit, b15_2ig_delays, 8, strip_forks=True)) compare_to_logic_sim(WaveSimCuda(b15_2ig_circuit_resolved, b15_2ig_delays, 8, strip_forks=True))

Loading…
Cancel
Save