diff --git a/src/kyupy/__init__.py b/src/kyupy/__init__.py index 9be6485..ea967b4 100644 --- a/src/kyupy/__init__.py +++ b/src/kyupy/__init__.py @@ -17,6 +17,10 @@ import numpy as np _pop_count_lut = np.asarray([bin(x).count('1') for x in range(256)]) +def cdiv(x, y): + return -(x // -y) + + def popcount(a): """Returns the number of 1-bits in a given packed numpy array.""" return np.sum(_pop_count_lut[a]) diff --git a/src/kyupy/circuit.py b/src/kyupy/circuit.py index 8802cec..5342a1a 100644 --- a/src/kyupy/circuit.py +++ b/src/kyupy/circuit.py @@ -228,10 +228,10 @@ class Circuit: Usually, nodes in the io_nodes list without any lines in their :py:attr:`Node.ins` list are primary inputs, and nodes without any lines in their :py:attr:`Node.outs` list are regarded as primary outputs. """ - self.cells = {} + self.cells : dict[str, Node] = {} """A dictionary to access cells by name. """ - self.forks = {} + self.forks : dict[str, Node] = {} """A dictionary to access forks by name. """ diff --git a/src/kyupy/sdf.py b/src/kyupy/sdf.py index c1e3ebf..e2e9e0b 100644 --- a/src/kyupy/sdf.py +++ b/src/kyupy/sdf.py @@ -15,6 +15,7 @@ import numpy as np from lark import Lark, Transformer from . import log, readtext +from .circuit import Circuit from .techlib import TechLib @@ -27,17 +28,48 @@ class DelayFile: """ def __init__(self, name, cells): self.name = name - if None in cells: - self.interconnects = cells[None] - else: - self.interconnects = None + self.interconnects = cells.get(None, None) self.cells = dict((n, l) for n, l in cells.items() if n) def __repr__(self): return '\n'.join(f'{n}: {l}' for n, l in self.cells.items()) + '\n' + \ '\n'.join(str(i) for i in self.interconnects) - def annotation(self, circuit, tlib=TechLib(), dataset=1, interconnect=True, ffdelays=True): + def iopaths(self, circuit:Circuit, tlib=TechLib()): + """Constructs an ndarray containing all IOPATH delays. + + All IOPATH delays for a node `n` are annotated to the line connected to the input pin specified in the IOPATH. + + Axis 0: dataset (usually 3 datasets per SDF-file) + Axis 1: line index (e.g. `n.ins[0]`, `n.ins[1]`) + Axis 2: polarity of the transition at the IOPATH-input (e.g. at `n.ins[0]` or `n.ins[1]`), 0='rising/posedge', 1='falling/negedge' + Axis 3: polarity of the transition at the IOPATH-output (at `n.outs[0]`), 0='rising/posedge', 1='falling/negedge' + """ + + def find_cell(name:str): + if name not in circuit.cells: name = name.replace('\\', '') + if name not in circuit.cells: name = name.replace('[', '_').replace(']', '_') + return circuit.cells.get(name, None) + + delays = np.zeros((len(circuit.lines), 2, 2, 3)) # dataset last during construction. + + for name, iopaths in self.cells.items(): + if cell := find_cell(name): + for i_pin_spec, o_pin_spec, *dels in iopaths: + if i_pin_spec.startswith('(posedge '): i_pol_idxs = [0] + elif i_pin_spec.startswith('(negedge '): i_pol_idxs = [1] + else: i_pol_idxs = [0, 1] + i_pin_spec = re.sub(r'\((neg|pos)edge ([^)]+)\)', r'\2', i_pin_spec) + if line := cell.ins[tlib.pin_index(cell.kind, i_pin_spec)]: + delays[line, i_pol_idxs] = [d if len(d) > 0 else [0, 0, 0] for d in dels] + else: + log.warn(f'No line to annotate in circuit: {i_pin_spec} for {cell}') + else: + log.warn(f'Name from SDF not found in circuit: {name}') + + return np.moveaxis(delays, -1, 0) + + def annotation(self, circuit:Circuit, tlib=TechLib(), dataset=1, interconnect=True, ffdelays=True): """Constructs an 3-dimensional ndarray with timing data for each line in ``circuit``. An IOPATH delay for a node is annotated to the line connected to the input pin specified in the IOPATH. @@ -75,11 +107,9 @@ class DelayFile: return sum(_delvals[idx][d] for d in dataset) / len(dataset) return _delvals[idx][dataset] - def find_cell(name): - if name not in circuit.cells: - name = name.replace('\\', '') - if name not in circuit.cells: - name = name.replace('[', '_').replace(']', '_') + def find_cell(name:str): + if name not in circuit.cells: name = name.replace('\\', '') + if name not in circuit.cells: name = name.replace('[', '_').replace(']', '_') if name not in circuit.cells: return None return circuit.cells[name] diff --git a/src/kyupy/wave_sim.py b/src/kyupy/wave_sim.py index 0796968..a6feffd 100644 --- a/src/kyupy/wave_sim.py +++ b/src/kyupy/wave_sim.py @@ -47,7 +47,7 @@ class WaveSim(sim.SimOps): :param keep_waveforms: If disabled, memory of intermediate signal waveforms will be re-used. This greatly reduces memory footprint, but intermediate signal waveforms become unaccessible after a propagation. """ - def __init__(self, circuit, timing, sims=8, c_caps=16, c_reuse=False, strip_forks=False): + def __init__(self, circuit, delays, sims=8, c_caps=16, c_reuse=False, strip_forks=False): assert c_caps > 0 and c_caps % 4 == 0 super().__init__(circuit, c_caps=c_caps//4, c_reuse=c_reuse, strip_forks=strip_forks) self.sims = sims @@ -56,8 +56,8 @@ class WaveSim(sim.SimOps): self.c_locs[...] *= 4 self.c_caps[...] *= 4 - self.timing = np.zeros((self.c_locs_len, 2, 2)) - self.timing[:len(timing)] = timing + self.delays = np.zeros((len(delays), self.c_locs_len, 2, 2), dtype=delays.dtype) + self.delays[:, :delays.shape[1]] = delays self.c = np.zeros((self.c_len, sims), dtype=np.float32) + TMAX self.s = np.zeros((11, self.s_len, sims), dtype=np.float32) @@ -128,7 +128,7 @@ class WaveSim(sim.SimOps): sims = min(sims or self.sims, self.sims) for op_start, op_stop in zip(self.level_starts, self.level_stops): level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, 0, sims, - self.timing, self.params, sd, seed) + self.delays, self.params, sd, seed) def c_to_s(self, time=TMAX, sd=0.0, seed=1): """Simulates a capture operation at all sequential elements and primary outputs. @@ -173,7 +173,7 @@ def rand_gauss_cpu(seed, sd): @numba.njit -def wave_eval_cpu(op, cbuf, c_locs, c_caps, st_idx, line_times, param, sd=0.0, seed=0): +def wave_eval_cpu_old(op, cbuf, c_locs, c_caps, st_idx, line_times, param, sd=0.0, seed=0): lut, z_idx, a_idx, b_idx, c_idx, d_idx = op # >>> same code as wave_eval_cpu (except rand_gauss_*pu()-calls) >>> @@ -191,7 +191,7 @@ def wave_eval_cpu(op, cbuf, c_locs, c_caps, st_idx, line_times, param, sd=0.0, s a_cur = int(0) b_cur = int(0) c_cur = int(0) - d_cur = int(0) + d_cur = int(0) z_cur = lut & 1 if z_cur == 1: cbuf[z_mem, st_idx] = TMIN @@ -276,17 +276,116 @@ def wave_eval_cpu(op, cbuf, c_locs, c_caps, st_idx, line_times, param, sd=0.0, s current_t = min(a, b, c, d) - # generate overflow flag or propagate from input + # generate or propagate overflow flag cbuf[z_mem + z_cur, st_idx] = TMAX_OVL if overflows > 0 else max(a, b, c, d) + @numba.njit -def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, st_start, st_stop, line_times, params, sd, seed): +def wave_eval_cpu(op, cbuf, c_locs, c_caps, st_idx, delays, param, sd=0.0, seed=0): + lut, z_idx, a_idx, b_idx, c_idx, d_idx = op + + # >>> same code as wave_eval_cpu (except rand_gauss_*pu()-calls) >>> + overflows = int(0) + + if len(delays) > 1: + _rnd = (seed << 4) + (z_idx << 20) + (st_idx << 1) + for _ in range(4): + _rnd = int(0xDEECE66D) * _rnd + 0xB + delays = delays[_rnd % len(delays)] + else: + delays = delays[0] + + a_mem = c_locs[a_idx] + b_mem = c_locs[b_idx] + c_mem = c_locs[c_idx] + d_mem = c_locs[d_idx] + z_mem = c_locs[z_idx] + z_cap = c_caps[z_idx] + + a_cur = int(0) + b_cur = int(0) + c_cur = int(0) + d_cur = int(0) + z_cur = lut & 1 + if z_cur == 1: + cbuf[z_mem, st_idx] = TMIN + + z_val = z_cur + + a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val] + b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val] + c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val] + d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val] + + previous_t = TMIN + + current_t = min(a, b, c, d) + inputs = int(0) + + while current_t < TMAX: + if a == current_t: + a_cur += 1 + inputs ^= 1 + thresh = delays[a_idx, 0, z_val] + a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val] + next_t = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val ^ 1] + elif b == current_t: + b_cur += 1 + inputs ^= 2 + thresh = delays[b_idx, 0, z_val] + b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val] + next_t = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val ^ 1] + elif c == current_t: + c_cur += 1 + inputs ^= 4 + thresh = delays[c_idx, 0, z_val] + c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val] + next_t = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val ^ 1] + else: + d_cur += 1 + inputs ^= 8 + thresh = delays[d_idx, 0, z_val] + d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val] + next_t = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val ^ 1] + + if (z_cur & 1) != ((lut >> inputs) & 1): + # we generate an edge in z_mem, if ... + if (z_cur == 0 # it is the first edge in z_mem ... + or next_t < current_t # -OR- the next edge on SAME input is EARLIER (need current edge to filter BOTH in next iteration) ... + or (current_t - previous_t) > thresh # -OR- the generated hazard is wider than pulse threshold. + ): + if z_cur < (z_cap - 1): # enough space in z_mem? + cbuf[z_mem + z_cur, st_idx] = current_t + previous_t = current_t + z_cur += 1 + else: + overflows += 1 + previous_t = cbuf[z_mem + z_cur - 1, st_idx] + z_cur -= 1 + else: + z_cur -= 1 + previous_t = cbuf[z_mem + z_cur - 1, st_idx] if z_cur > 0 else TMIN + + # output value of cell changed. update all delayed inputs. + z_val = z_val ^ 1 + a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val] + b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val] + c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val] + d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val] + + current_t = min(a, b, c, d) + + # generate or propagate overflow flag + cbuf[z_mem + z_cur, st_idx] = TMAX_OVL if overflows > 0 else max(a, b, c, d) + +@numba.njit +def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, st_start, st_stop, delays, params, sd, seed): overflows = 0 for op_idx in range(op_start, op_stop): op = ops[op_idx] for st_idx in range(st_start, st_stop): - wave_eval_cpu(op, c, c_locs, c_caps, st_idx, line_times, params[st_idx], sd, seed) + wave_eval_cpu(op, c, c_locs, c_caps, st_idx, delays, params[st_idx], sd, seed) @numba.njit @@ -342,15 +441,15 @@ class WaveSimCuda(WaveSim): All internal memories are mirrored into GPU memory upon construction. Some operations like access to single waveforms can involve large communication overheads. """ - def __init__(self, circuit, timing, sims=8, c_caps=16, c_reuse=False, strip_forks=False): - super().__init__(circuit, timing, sims, c_caps, c_reuse, strip_forks) + def __init__(self, circuit, delays, sims=8, c_caps=16, c_reuse=False, strip_forks=False): + super().__init__(circuit, delays, sims, c_caps, c_reuse, strip_forks) self.c = cuda.to_device(self.c) self.s = cuda.to_device(self.s) self.ops = cuda.to_device(self.ops) self.c_locs = cuda.to_device(self.c_locs) self.c_caps = cuda.to_device(self.c_caps) - self.timing = cuda.to_device(self.timing) + self.delays = cuda.to_device(self.delays) self.params = cuda.to_device(self.params) self._block_dim = (32, 16) @@ -369,7 +468,7 @@ class WaveSimCuda(WaveSim): for op_start, op_stop in zip(self.level_starts, self.level_stops): grid_dim = self._grid_dim(sims, op_stop - op_start) wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, int(0), - sims, self.timing, self.params, sd, seed) + sims, self.delays, self.params, sd, seed) cuda.synchronize() def c_to_s(self, time=TMAX, sd=0.0, seed=1): @@ -423,7 +522,7 @@ def rand_gauss_gpu(seed, sd): @cuda.jit() -def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, st_start, st_stop, line_times, param, sd, seed): +def wave_eval_gpu_old(ops, op_start, op_stop, cbuf, c_locs, c_caps, st_start, st_stop, line_times, param, sd, seed): x, y = cuda.grid(2) st_idx = st_start + x op_idx = op_start + y @@ -539,7 +638,119 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, st_start, st_sto current_t = min(a, b, c, d) - # generate overflow flag or propagate from input + # generate or propagate overflow flag + cbuf[z_mem + z_cur, st_idx] = TMAX_OVL if overflows > 0 else max(a, b, c, d) + + +@cuda.jit() +def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, st_start, st_stop, delays, param, sd, seed): + x, y = cuda.grid(2) + st_idx = st_start + x + op_idx = op_start + y + if st_idx >= st_stop: return + if op_idx >= op_stop: return + + lut = ops[op_idx, 0] + z_idx = ops[op_idx, 1] + a_idx = ops[op_idx, 2] + b_idx = ops[op_idx, 3] + c_idx = ops[op_idx, 4] + d_idx = ops[op_idx, 5] + + param = param[st_idx] + + # >>> same code as wave_eval_cpu (except rand_gauss_*pu()-calls) >>> + overflows = int(0) + + if len(delays) > 1: + _rnd = (seed << 4) + (z_idx << 20) + (st_idx << 1) + for _ in range(4): + _rnd = int(0xDEECE66D) * _rnd + 0xB + delays = delays[_rnd % len(delays)] + else: + delays = delays[0] + + a_mem = c_locs[a_idx] + b_mem = c_locs[b_idx] + c_mem = c_locs[c_idx] + d_mem = c_locs[d_idx] + z_mem = c_locs[z_idx] + z_cap = c_caps[z_idx] + + a_cur = int(0) + b_cur = int(0) + c_cur = int(0) + d_cur = int(0) + z_cur = lut & 1 + if z_cur == 1: + cbuf[z_mem, st_idx] = TMIN + + z_val = z_cur + + a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val] + b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val] + c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val] + d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val] + + previous_t = TMIN + + current_t = min(a, b, c, d) + inputs = int(0) + + while current_t < TMAX: + if a == current_t: + a_cur += 1 + inputs ^= 1 + thresh = delays[a_idx, 0, z_val] + a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val] + next_t = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val ^ 1] + elif b == current_t: + b_cur += 1 + inputs ^= 2 + thresh = delays[b_idx, 0, z_val] + b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val] + next_t = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val ^ 1] + elif c == current_t: + c_cur += 1 + inputs ^= 4 + thresh = delays[c_idx, 0, z_val] + c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val] + next_t = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val ^ 1] + else: + d_cur += 1 + inputs ^= 8 + thresh = delays[d_idx, 0, z_val] + d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val] + next_t = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val ^ 1] + + if (z_cur & 1) != ((lut >> inputs) & 1): + # we generate an edge in z_mem, if ... + if (z_cur == 0 # it is the first edge in z_mem ... + or next_t < current_t # -OR- the next edge on SAME input is EARLIER (need current edge to filter BOTH in next iteration) ... + or (current_t - previous_t) > thresh # -OR- the generated hazard is wider than pulse threshold. + ): + if z_cur < (z_cap - 1): # enough space in z_mem? + cbuf[z_mem + z_cur, st_idx] = current_t + previous_t = current_t + z_cur += 1 + else: + overflows += 1 + previous_t = cbuf[z_mem + z_cur - 1, st_idx] + z_cur -= 1 + else: + z_cur -= 1 + previous_t = cbuf[z_mem + z_cur - 1, st_idx] if z_cur > 0 else TMIN + + # output value of cell changed. update all delayed inputs. + z_val = z_val ^ 1 + a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val] + b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val] + c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val] + d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val] + + current_t = min(a, b, c, d) + + # generate or propagate overflow flag cbuf[z_mem + z_cur, st_idx] = TMAX_OVL if overflows > 0 else max(a, b, c, d) diff --git a/tests/conftest.py b/tests/conftest.py index 5d88c6b..e2b538e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,6 @@ def b14_circuit(mydir): return verilog.load(mydir / 'b14.v.gz', branchforks=True) @pytest.fixture(scope='session') -def b14_timing(mydir, b14_circuit): +def b14_delays(mydir, b14_circuit): from kyupy import sdf - return sdf.load(mydir / 'b14.sdf.gz').annotation(b14_circuit) + return sdf.load(mydir / 'b14.sdf.gz').iopaths(b14_circuit)[1:2] diff --git a/tests/test_wave_sim.py b/tests/test_wave_sim.py index 6a4bad3..1003689 100644 --- a/tests/test_wave_sim.py +++ b/tests/test_wave_sim.py @@ -11,28 +11,28 @@ def test_nand_delays(): c = np.full((5*16, 1), TMAX) # 5 waveforms of capacity 16 c_locs = np.zeros((5,), dtype='int') c_caps = np.zeros((5,), dtype='int') - + for i in range(5): c_locs[i], c_caps[i] = i*16, 16 # 1:1 mapping # SDF specifies IOPATH delays with respect to output polarity # SDF pulse rejection value is determined by IOPATH causing last transition and polarity of last transition - line_times = np.zeros((5, 2, 2)) - line_times[0, 0, 0] = 0.1 # A -> Z rise delay - line_times[0, 0, 1] = 0.2 # A -> Z fall delay - line_times[0, 1, 0] = 0.1 # A -> Z negative pulse limit (terminate in rising Z) - line_times[0, 1, 1] = 0.2 # A -> Z positive pulse limit - line_times[1, :, 0] = 0.3 # as above for B -> Z - line_times[1, :, 1] = 0.4 - line_times[2, :, 0] = 0.5 # as above for C -> Z - line_times[2, :, 1] = 0.6 - line_times[3, :, 0] = 0.7 # as above for D -> Z - line_times[3, :, 1] = 0.8 + delays = np.zeros((1, 5, 2, 2)) + delays[0, 0, 0, 0] = 0.1 # A -> Z rise delay + delays[0, 0, 0, 1] = 0.2 # A -> Z fall delay + delays[0, 0, 1, 0] = 0.1 # A -> Z negative pulse limit (terminate in rising Z) + delays[0, 0, 1, 1] = 0.2 # A -> Z positive pulse limit + delays[0, 1, :, 0] = 0.3 # as above for B -> Z + delays[0, 1, :, 1] = 0.4 + delays[0, 2, :, 0] = 0.5 # as above for C -> Z + delays[0, 2, :, 1] = 0.6 + delays[0, 3, :, 0] = 0.7 # as above for D -> Z + delays[0, 3, :, 1] = 0.8 sdata = np.asarray([1, -1, 0, 0], dtype='float32') def wave_assert(inputs, output): for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i - wave_eval_cpu(op, c, c_locs, c_caps, 0, line_times, sdata) + wave_eval_cpu(op, c, c_locs, c_caps, 0, delays, sdata) for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[4,i], v) wave_assert([[TMAX,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(0,0,1,1) => 1 @@ -50,75 +50,74 @@ def test_nand_delays(): def test_tiny_circuit(): c = bench.parse('input(x, y) output(a, o, n) a=and(x,y) o=or(x,y) n=not(x)') - lt = np.zeros((len(c.lines), 2, 2)) - lt[:,0,:] = 1.0 # unit delay for all lines - wsim = WaveSim(c, lt) + delays = np.full((1, len(c.lines), 2, 2), 1.0) # unit delay for all lines + wsim = WaveSim(c, delays) assert wsim.s.shape[1] == 5 # values for x - wsim.s[:3,0,0] = 0, 0.1, 0 - wsim.s[:3,0,1] = 0, 0.2, 1 - wsim.s[:3,0,2] = 1, 0.3, 0 - wsim.s[:3,0,3] = 1, 0.4, 1 - + wsim.s[:3,0,0] = 0, 10, 0 + wsim.s[:3,0,1] = 0, 20, 1 + wsim.s[:3,0,2] = 1, 30, 0 + wsim.s[:3,0,3] = 1, 40, 1 + # values for y - wsim.s[:3,1,0] = 1, 0.5, 0 - wsim.s[:3,1,1] = 1, 0.6, 0 - wsim.s[:3,1,2] = 1, 0.7, 0 - wsim.s[:3,1,3] = 0, 0.8, 1 + wsim.s[:3,1,0] = 1, 50, 0 + wsim.s[:3,1,1] = 1, 60, 0 + wsim.s[:3,1,2] = 1, 70, 0 + wsim.s[:3,1,3] = 0, 80, 1 wsim.s_to_c() x_c_loc = wsim.c_locs[wsim.ppi_offset+0] # check x waveforms np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 0], [TMAX, TMAX, TMAX]) - np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 1], [0.2, TMAX, TMAX]) - np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 2], [TMIN, 0.3, TMAX]) + np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 1], [20, TMAX, TMAX]) + np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 2], [TMIN, 30, TMAX]) np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 3], [TMIN, TMAX, TMAX]) y_c_loc = wsim.c_locs[wsim.ppi_offset+1] # check y waveforms - np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 0], [TMIN, 0.5, TMAX]) - np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 1], [TMIN, 0.6, TMAX]) - np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 2], [TMIN, 0.7, TMAX]) - np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 3], [0.8, TMAX, TMAX]) + np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 0], [TMIN, 50, TMAX]) + np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 1], [TMIN, 60, TMAX]) + np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 2], [TMIN, 70, TMAX]) + np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 3], [80, TMAX, TMAX]) wsim.c_prop() a_c_loc = wsim.c_locs[wsim.ppo_offset+2] # check a waveforms np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 0], [TMAX, TMAX, TMAX]) - np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 1], [1.2, 1.6, TMAX]) - np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 2], [TMIN, 1.3, TMAX]) - np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 3], [1.8, TMAX, TMAX]) + np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 1], [21, 61, TMAX]) + np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 2], [TMIN, 31, TMAX]) + np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 3], [81, TMAX, TMAX]) o_c_loc = wsim.c_locs[wsim.ppo_offset+3] # check o waveforms - np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 0], [TMIN, 1.5, TMAX]) + np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 0], [TMIN, 51, TMAX]) np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 1], [TMIN, TMAX, TMAX]) - np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 2], [TMIN, 1.7, TMAX]) + np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 2], [TMIN, 71, TMAX]) np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 3], [TMIN, TMAX, TMAX]) n_c_loc = wsim.c_locs[wsim.ppo_offset+4] # check n waveforms np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 0], [TMIN, TMAX, TMAX]) - np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 1], [TMIN, 1.2, TMAX]) - np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 2], [1.3, TMAX, TMAX]) + np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 1], [TMIN, 21, TMAX]) + np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 2], [31, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 3], [TMAX, TMAX, TMAX]) wsim.c_to_s() # check a captures np.testing.assert_allclose(wsim.s[3:7, 2, 0], [0, TMAX, TMIN, 0]) - np.testing.assert_allclose(wsim.s[3:7, 2, 1], [0, 1.2, 1.6, 0]) - np.testing.assert_allclose(wsim.s[3:7, 2, 2], [1, 1.3, 1.3, 0]) - np.testing.assert_allclose(wsim.s[3:7, 2, 3], [0, 1.8, 1.8, 1]) - - # check o captures - np.testing.assert_allclose(wsim.s[3:7, 3, 0], [1, 1.5, 1.5, 0]) + np.testing.assert_allclose(wsim.s[3:7, 2, 1], [0, 21, 61, 0]) + np.testing.assert_allclose(wsim.s[3:7, 2, 2], [1, 31, 31, 0]) + np.testing.assert_allclose(wsim.s[3:7, 2, 3], [0, 81, 81, 1]) + + # check o captures + np.testing.assert_allclose(wsim.s[3:7, 3, 0], [1, 51, 51, 0]) np.testing.assert_allclose(wsim.s[3:7, 3, 1], [1, TMAX, TMIN, 1]) - np.testing.assert_allclose(wsim.s[3:7, 3, 2], [1, 1.7, 1.7, 0]) + np.testing.assert_allclose(wsim.s[3:7, 3, 2], [1, 71, 71, 0]) np.testing.assert_allclose(wsim.s[3:7, 3, 3], [1, TMAX, TMIN, 1]) - - # check o captures + + # check o captures np.testing.assert_allclose(wsim.s[3:7, 4, 0], [1, TMAX, TMIN, 1]) - np.testing.assert_allclose(wsim.s[3:7, 4, 1], [1, 1.2, 1.2, 0]) - np.testing.assert_allclose(wsim.s[3:7, 4, 2], [0, 1.3, 1.3, 1]) + np.testing.assert_allclose(wsim.s[3:7, 4, 1], [1, 21, 21, 0]) + np.testing.assert_allclose(wsim.s[3:7, 4, 2], [0, 31, 31, 1]) np.testing.assert_allclose(wsim.s[3:7, 4, 3], [0, TMAX, TMIN, 0]) @@ -157,13 +156,16 @@ def compare_to_logic_sim(wsim: WaveSim): np.testing.assert_allclose(resp, exp) -def test_b14(b14_circuit, b14_timing): - compare_to_logic_sim(WaveSim(b14_circuit, b14_timing, 8)) +def test_b14(b14_circuit, b14_delays): + compare_to_logic_sim(WaveSim(b14_circuit, b14_delays, 8)) + +def test_b14_strip_forks(b14_circuit, b14_delays): + compare_to_logic_sim(WaveSim(b14_circuit, b14_delays, 8, strip_forks=True)) -def test_b14_strip_forks(b14_circuit, b14_timing): - compare_to_logic_sim(WaveSim(b14_circuit, b14_timing, 8, strip_forks=True)) +def test_b14_cuda(b14_circuit, b14_delays): + compare_to_logic_sim(WaveSimCuda(b14_circuit, b14_delays, 8, strip_forks=True)) -def test_b14_cuda(b14_circuit, b14_timing): - compare_to_logic_sim(WaveSimCuda(b14_circuit, b14_timing, 8, strip_forks=True)) +if __name__ == '__main__': + test_nand_delays() \ No newline at end of file