diff --git a/src/kyupy/wave_sim4.py b/src/kyupy/wave_sim4.py index 3a44f2a..183df03 100644 --- a/src/kyupy/wave_sim4.py +++ b/src/kyupy/wave_sim4.py @@ -10,7 +10,6 @@ The simulators are not event-based and are not capable of simulating sequential """ import math -from bisect import bisect, insort_left import numpy as np @@ -76,19 +75,14 @@ class WaveSim(SimOps): * ``s[..., 8]`` (P)PO sampled capture value: decided by random sampling according to a given seed. * ``s[..., 9]`` (P)PO sampled capture slack: (capture time - LST) - decided by random sampling according to a given seed. * ``s[..., 10]`` Overflow indicator: If non-zero, some signals in the input cone of this output had more - transitions than specified in ``wavecaps``. Some transitions have been discarded, the + transitions than specified in ``c_caps``. Some transitions have been discarded, the final values in the waveforms are still valid. """ self.params = np.zeros((sims, 4), dtype=np.float32) self.params[...,0] = 1.0 - m1 = np.array([2 ** x for x in range(7, -1, -1)], dtype=np.uint8) - m0 = ~m1 - self.mask = np.rollaxis(np.vstack((m0, m1)), 1) - - self.overflows = 0 - self.lst_eat_valid = False + self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.vat, self.ops, self.params)]) self.pi_s_locs = np.flatnonzero(self.vat[self.ppi_offset+np.arange(len(self.circuit.io_nodes)), 0] >= 0) self.po_s_locs = np.flatnonzero(self.vat[self.ppo_offset+np.arange(len(self.circuit.io_nodes)), 0] >= 0) @@ -105,20 +99,9 @@ class WaveSim(SimOps): self.pippi_c_locs = np.concatenate([self.pi_c_locs, self.ppi_c_locs]) self.poppo_c_locs = np.concatenate([self.po_c_locs, self.ppo_c_locs]) - self.wave_capture = numba.njit(WaveSim.wave_capture) - def __repr__(self): - total_mem = self.c.nbytes + self.vat.nbytes + self.ops.nbytes + self.s.nbytes - return f'' - - def get_line_delay(self, line, polarity): - """Returns the current delay of the given ``line`` and ``polarity`` in the simulation model.""" - return self.timing[line, 0, polarity] - - def set_line_delay(self, line, polarity, delay): - """Sets a new ``delay`` for the given ``line`` and ``polarity`` in the simulation model.""" - self.timing[line, 0, polarity] = delay + return f'<{type(self).__name__} {self.circuit.name} sims={self.sims} ops={len(self.ops)} ' + \ + f'levels={len(self.level_starts)} mem={hr_bytes(self.nbytes)}>' def s_to_c(self): """Transfers values of sequential elements and primary inputs to the combinational portion. @@ -141,9 +124,8 @@ class WaveSim(SimOps): """ sims = min(sims or self.sims, self.sims) for op_start, op_stop in zip(self.level_starts, self.level_stops): - self.overflows += level_eval(self.ops, op_start, op_stop, self.c, self.vat, 0, sims, + level_eval_cpu(self.ops, op_start, op_stop, self.c, self.vat, 0, sims, self.timing, self.params, sd, seed) - self.lst_eat_valid = False def c_to_s(self, time=TMAX, sd=0.0, seed=1): """Simulates a capture operation at all sequential elements and primary outputs. @@ -157,7 +139,7 @@ class WaveSim(SimOps): """ for s_loc, (c_loc, c_len, _) in zip(self.poppo_s_locs, self.vat[self.ppo_offset+self.poppo_s_locs]): for vector in range(self.sims): - self.s[s_loc, vector, 3:] = self.wave_capture(self.c, c_loc, c_len, vector, time=time, sd=sd, seed=seed) + self.s[s_loc, vector, 3:] = wave_capture_cpu(self.c, c_loc, c_len, vector, time=time, sd=sd, seed=seed) def s_ppo_to_ppi(self, time=0.0): """Re-assigns the last sampled capture to the appropriate pseudo-primary inputs (PPI). @@ -170,64 +152,229 @@ class WaveSim(SimOps): self.s[self.ppio_s_locs, :, 1] = time self.s[self.ppio_s_locs, :, 2] = self.s[self.ppio_s_locs, :, 8] - @staticmethod - def wave_capture(c, c_loc, c_len, vector, time=TMAX, sd=0.0, seed=1): - s_sqrt2 = sd * math.sqrt(2) - m = 0.5 - acc = 0.0 - eat = TMAX - lst = TMIN - tog = 0 - ovl = 0 - val = int(0) - final = int(0) - w = c[c_loc:c_loc+c_len, vector] - for t in w: - if t >= TMAX: - if t == TMAX_OVL: - ovl = 1 - break - m = -m - final ^= 1 - if t < time: - val ^= 1 - if t <= TMIN: continue - if s_sqrt2 > 0: - acc += m * (1 + math.erf((t - time) / s_sqrt2)) - eat = min(eat, t) - lst = max(lst, t) - tog += 1 - if s_sqrt2 > 0: - if m < 0: - acc += 1 - if acc >= 0.99: - val = 1 - elif acc > 0.01: - seed = (seed << 4) + (vector << 20) + c_loc - seed = int(0xDEECE66D) * seed + 0xB - seed = int(0xDEECE66D) * seed + 0xB - rnd = float((seed >> 8) & 0xffffff) / float(1 << 24) - val = rnd < acc - else: - val = 0 - else: - acc = val - return (w[0] <= TMIN), eat, lst, final, acc, val, 0, ovl +@numba.njit +def rand_gauss_cpu(seed, sd): + clamp = 0.5 + if sd <= 0.0: + return 1.0 + while True: + x = -6.0 + for _ in range(12): + seed = int(0xDEECE66D) * seed + 0xB + x += float((seed >> 8) & 0xffffff) / float(1 << 24) + x *= sd + if abs(x) <= clamp: + break + return x + 1.0 + + +@numba.njit +def wave_eval_cpu(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0): + lut, z_idx, a_idx, b_idx, c_idx, d_idx = op + + # >>> same code as wave_eval_cpu (except rand_gauss_*pu()-call) >>> + overflows = int(0) + + _seed = (seed << 4) + (z_idx << 20) + (st_idx << 1) + + a_mem = vat[a_idx, 0] + b_mem = vat[b_idx, 0] + c_mem = vat[c_idx, 0] + d_mem = vat[d_idx, 0] + z_mem, z_cap, _ = vat[z_idx] + + a_cur = int(0) + b_cur = int(0) + c_cur = int(0) + d_cur = int(0) + z_cur = lut & 1 + if z_cur == 1: + cbuf[z_mem, st_idx] = TMIN + + a = cbuf[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss_cpu(_seed ^ a_mem ^ z_cur, sd) * param[0] + if int(param[1]) == a_idx: a += param[2+z_cur] + b = cbuf[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss_cpu(_seed ^ b_mem ^ z_cur, sd) * param[0] + if int(param[1]) == b_idx: b += param[2+z_cur] + c = cbuf[c_mem, st_idx] + line_times[c_idx, 0, z_cur] * rand_gauss_cpu(_seed ^ c_mem ^ z_cur, sd) * param[0] + if int(param[1]) == c_idx: c += param[2+z_cur] + d = cbuf[d_mem, st_idx] + line_times[d_idx, 0, z_cur] * rand_gauss_cpu(_seed ^ d_mem ^ z_cur, sd) * param[0] + if int(param[1]) == d_idx: d += param[2+z_cur] + + previous_t = TMIN + + current_t = min(a, b, c, d) + inputs = int(0) + + while current_t < TMAX: + z_val = z_cur & 1 + if a == current_t: + a_cur += 1 + a = cbuf[a_mem + a_cur, st_idx] + a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss_cpu(_seed ^ a_mem ^ z_val ^ 1, sd) * param[0] + thresh = line_times[a_idx, 1, z_val] * rand_gauss_cpu(_seed ^ a_mem ^ z_val, sd) * param[0] + if int(param[1]) == a_idx: + a += param[2+(z_val^1)] + thresh += param[2+z_val] + inputs ^= 1 + next_t = a + + elif b == current_t: + b_cur += 1 + b = cbuf[b_mem + b_cur, st_idx] + b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss_cpu(_seed ^ b_mem ^ z_val ^ 1, sd) * param[0] + thresh = line_times[b_idx, 1, z_val] * rand_gauss_cpu(_seed ^ b_mem ^ z_val, sd) * param[0] + if int(param[1]) == b_idx: + b += param[2+(z_val^1)] + thresh += param[2+z_val] + inputs ^= 2 + next_t = b + + elif c == current_t: + c_cur += 1 + c = cbuf[c_mem + c_cur, st_idx] + c += line_times[c_idx, 0, z_val ^ 1] * rand_gauss_cpu(_seed ^ c_mem ^ z_val ^ 1, sd) * param[0] + thresh = line_times[c_idx, 1, z_val] * rand_gauss_cpu(_seed ^ c_mem ^ z_val, sd) * param[0] + if int(param[1]) == c_idx: + c += param[2+(z_val^1)] + thresh += param[2+z_val] + inputs ^= 4 + next_t = c + + else: + d_cur += 1 + d = cbuf[d_mem + d_cur, st_idx] + d += line_times[d_idx, 0, z_val ^ 1] * rand_gauss_cpu(_seed ^ d_mem ^ z_val ^ 1, sd) * param[0] + thresh = line_times[d_idx, 1, z_val] * rand_gauss_cpu(_seed ^ d_mem ^ z_val, sd) * param[0] + if int(param[1]) == d_idx: + d += param[2+(z_val^1)] + thresh += param[2+z_val] + inputs ^= 8 + next_t = d + + if (z_cur & 1) != ((lut >> inputs) & 1): + # we generate a toggle in z_mem, if: + # ( it is the first toggle in z_mem OR + # following toggle is earlier OR + # pulse is wide enough ) AND enough space in z_mem. + if z_cur == 0 or next_t < current_t or (current_t - previous_t) > thresh: + if z_cur < (z_cap - 1): + cbuf[z_mem + z_cur, st_idx] = current_t + previous_t = current_t + z_cur += 1 + else: + overflows += 1 + previous_t = cbuf[z_mem + z_cur - 1, st_idx] + z_cur -= 1 + else: + z_cur -= 1 + previous_t = cbuf[z_mem + z_cur - 1, st_idx] if z_cur > 0 else TMIN + + current_t = min(a, b, c, d) + # generate overflow flag or propagate from input + cbuf[z_mem + z_cur, st_idx] = TMAX_OVL if overflows > 0 else max(a, b, c, d) + @numba.njit -def level_eval(ops, op_start, op_stop, c, vat, st_start, st_stop, line_times, params, sd, seed): +def level_eval_cpu(ops, op_start, op_stop, c, vat, st_start, st_stop, line_times, params, sd, seed): overflows = 0 for op_idx in range(op_start, op_stop): op = ops[op_idx] for st_idx in range(st_start, st_stop): - overflows += wave_eval(op, c, vat, st_idx, line_times, params[st_idx], sd, seed) - return overflows + wave_eval_cpu(op, c, vat, st_idx, line_times, params[st_idx], sd, seed) @numba.njit -def rand_gauss(seed, sd): +def wave_capture_cpu(c, c_loc, c_len, vector, time=TMAX, sd=0.0, seed=1): + s_sqrt2 = sd * math.sqrt(2) + m = 0.5 + acc = 0.0 + eat = TMAX + lst = TMIN + tog = 0 + ovl = 0 + val = int(0) + final = int(0) + w = c[c_loc:c_loc+c_len, vector] + for t in w: + if t >= TMAX: + if t == TMAX_OVL: + ovl = 1 + break + m = -m + final ^= 1 + if t < time: + val ^= 1 + if t <= TMIN: continue + if s_sqrt2 > 0: + acc += m * (1 + math.erf((t - time) / s_sqrt2)) + eat = min(eat, t) + lst = max(lst, t) + tog += 1 + if s_sqrt2 > 0: + if m < 0: + acc += 1 + if acc >= 0.99: + val = 1 + elif acc > 0.01: + seed = (seed << 4) + (vector << 20) + c_loc + seed = int(0xDEECE66D) * seed + 0xB + seed = int(0xDEECE66D) * seed + 0xB + rnd = float((seed >> 8) & 0xffffff) / float(1 << 24) + val = rnd < acc + else: + val = 0 + else: + acc = val + + return (w[0] <= TMIN), eat, lst, final, acc, val, 0, ovl + + +class WaveSimCuda(WaveSim): + """A GPU-accelerated waveform-based combinational logic timing simulator. + + The API is the same as for :py:class:`WaveSim`. + All internal memories are mirrored into GPU memory upon construction. + Some operations like access to single waveforms can involve large communication overheads. + """ + def __init__(self, circuit, timing, sims=8, c_caps=16, c_reuse=False, strip_forks=False): + super().__init__(circuit, timing, sims, c_caps, c_reuse, strip_forks) + + self.c = cuda.to_device(self.c) + self.s = cuda.to_device(self.s) + self.ops = cuda.to_device(self.ops) + self.vat = cuda.to_device(self.vat) + self.timing = cuda.to_device(self.timing) + self.params = cuda.to_device(self.params) + + self._block_dim = (32, 16) + + # TODO implement on GPU + #def s_to_c(self): + + def _grid_dim(self, x, y): + gx = math.ceil(x / self._block_dim[0]) + gy = math.ceil(y / self._block_dim[1]) + return gx, gy + + def c_prop(self, sims=None, sd=0.0, seed=1): + sims = min(sims or self.sims, self.sims) + for op_start, op_stop in zip(self.level_starts, self.level_stops): + grid_dim = self._grid_dim(sims, op_stop - op_start) + wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.vat, int(0), + sims, self.timing, self.params, sd, seed) + cuda.synchronize() + + # TODO implement on GPU + #def c_to_s(self): + + # TODO implement on GPU + #def s_ppo_to_ppi(self, time=0.0): + + +@cuda.jit(device=True) +def rand_gauss_gpu(seed, sd): clamp = 0.5 if sd <= 0.0: return 1.0 @@ -242,9 +389,24 @@ def rand_gauss(seed, sd): return x + 1.0 -@numba.njit -def wave_eval(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0): - lut, z_idx, a_idx, b_idx, c_idx, d_idx = op +@cuda.jit() +def wave_eval_gpu(ops, op_start, op_stop, cbuf, vat, st_start, st_stop, line_times, param, sd, seed): + x, y = cuda.grid(2) + st_idx = st_start + x + op_idx = op_start + y + if st_idx >= st_stop: return + if op_idx >= op_stop: return + + lut = ops[op_idx, 0] + z_idx = ops[op_idx, 1] + a_idx = ops[op_idx, 2] + b_idx = ops[op_idx, 3] + c_idx = ops[op_idx, 4] + d_idx = ops[op_idx, 5] + + param = param[st_idx] + + # >>> same code as wave_eval_cpu (except rand_gauss_*pu()-call) >>> overflows = int(0) _seed = (seed << 4) + (z_idx << 20) + (st_idx << 1) @@ -263,13 +425,13 @@ def wave_eval(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0): if z_cur == 1: cbuf[z_mem, st_idx] = TMIN - a = cbuf[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss(_seed ^ a_mem ^ z_cur, sd) * param[0] + a = cbuf[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss_gpu(_seed ^ a_mem ^ z_cur, sd) * param[0] if int(param[1]) == a_idx: a += param[2+z_cur] - b = cbuf[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss(_seed ^ b_mem ^ z_cur, sd) * param[0] + b = cbuf[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss_gpu(_seed ^ b_mem ^ z_cur, sd) * param[0] if int(param[1]) == b_idx: b += param[2+z_cur] - c = cbuf[c_mem, st_idx] + line_times[c_idx, 0, z_cur] * rand_gauss(_seed ^ c_mem ^ z_cur, sd) * param[0] + c = cbuf[c_mem, st_idx] + line_times[c_idx, 0, z_cur] * rand_gauss_gpu(_seed ^ c_mem ^ z_cur, sd) * param[0] if int(param[1]) == c_idx: c += param[2+z_cur] - d = cbuf[d_mem, st_idx] + line_times[d_idx, 0, z_cur] * rand_gauss(_seed ^ d_mem ^ z_cur, sd) * param[0] + d = cbuf[d_mem, st_idx] + line_times[d_idx, 0, z_cur] * rand_gauss_gpu(_seed ^ d_mem ^ z_cur, sd) * param[0] if int(param[1]) == d_idx: d += param[2+z_cur] previous_t = TMIN @@ -282,8 +444,8 @@ def wave_eval(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0): if a == current_t: a_cur += 1 a = cbuf[a_mem + a_cur, st_idx] - a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ a_mem ^ z_val ^ 1, sd) * param[0] - thresh = line_times[a_idx, 1, z_val] * rand_gauss(_seed ^ a_mem ^ z_val, sd) * param[0] + a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss_gpu(_seed ^ a_mem ^ z_val ^ 1, sd) * param[0] + thresh = line_times[a_idx, 1, z_val] * rand_gauss_gpu(_seed ^ a_mem ^ z_val, sd) * param[0] if int(param[1]) == a_idx: a += param[2+(z_val^1)] thresh += param[2+z_val] @@ -293,8 +455,8 @@ def wave_eval(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0): elif b == current_t: b_cur += 1 b = cbuf[b_mem + b_cur, st_idx] - b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ b_mem ^ z_val ^ 1, sd) * param[0] - thresh = line_times[b_idx, 1, z_val] * rand_gauss(_seed ^ b_mem ^ z_val, sd) * param[0] + b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss_gpu(_seed ^ b_mem ^ z_val ^ 1, sd) * param[0] + thresh = line_times[b_idx, 1, z_val] * rand_gauss_gpu(_seed ^ b_mem ^ z_val, sd) * param[0] if int(param[1]) == b_idx: b += param[2+(z_val^1)] thresh += param[2+z_val] @@ -304,8 +466,8 @@ def wave_eval(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0): elif c == current_t: c_cur += 1 c = cbuf[c_mem + c_cur, st_idx] - c += line_times[c_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ c_mem ^ z_val ^ 1, sd) * param[0] - thresh = line_times[c_idx, 1, z_val] * rand_gauss(_seed ^ c_mem ^ z_val, sd) * param[0] + c += line_times[c_idx, 0, z_val ^ 1] * rand_gauss_gpu(_seed ^ c_mem ^ z_val ^ 1, sd) * param[0] + thresh = line_times[c_idx, 1, z_val] * rand_gauss_gpu(_seed ^ c_mem ^ z_val, sd) * param[0] if int(param[1]) == c_idx: c += param[2+(z_val^1)] thresh += param[2+z_val] @@ -315,19 +477,13 @@ def wave_eval(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0): else: d_cur += 1 d = cbuf[d_mem + d_cur, st_idx] - d += line_times[d_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ d_mem ^ z_val ^ 1, sd) * param[0] - thresh = line_times[d_idx, 1, z_val] * rand_gauss(_seed ^ d_mem ^ z_val, sd) * param[0] + d += line_times[d_idx, 0, z_val ^ 1] * rand_gauss_gpu(_seed ^ d_mem ^ z_val ^ 1, sd) * param[0] + thresh = line_times[d_idx, 1, z_val] * rand_gauss_gpu(_seed ^ d_mem ^ z_val, sd) * param[0] if int(param[1]) == d_idx: d += param[2+(z_val^1)] thresh += param[2+z_val] inputs ^= 8 next_t = d - #print("previous_t",previous_t) - #print("current_t",current_t) - #print(current_t - previous_t) - #print(thresh) - #print(z_cur & 1) - #print((lut >> inputs) & 1) if (z_cur & 1) != ((lut >> inputs) & 1): # we generate a toggle in z_mem, if: @@ -335,12 +491,8 @@ def wave_eval(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0): # following toggle is earlier OR # pulse is wide enough ) AND enough space in z_mem. if z_cur == 0 or next_t < current_t or (current_t - previous_t) > thresh: - #print(current_t - previous_t) - #print(thresh) - #print(z_cap) if z_cur < (z_cap - 1): cbuf[z_mem + z_cur, st_idx] = current_t - #print(cbuf[z_mem + z_cur, st_idx]) previous_t = current_t z_cur += 1 else: @@ -348,18 +500,10 @@ def wave_eval(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0): previous_t = cbuf[z_mem + z_cur - 1, st_idx] z_cur -= 1 else: - #print(a) z_cur -= 1 - if z_cur > 0: - previous_t = cbuf[z_mem + z_cur - 1, st_idx] - else: - previous_t = TMIN - + previous_t = cbuf[z_mem + z_cur - 1, st_idx] if z_cur > 0 else TMIN + current_t = min(a, b, c, d) - - if overflows > 0: - cbuf[z_mem + z_cur, st_idx] = TMAX_OVL - else: - cbuf[z_mem + z_cur, st_idx] = a if a == max(a, b, c, d) else b if b == max(a, b, c, d) else c if c == max(a, b, c, d) else d # propagate overflow flags by storing biggest TMAX from input - return overflows + # generate overflow flag or propagate from input + cbuf[z_mem + z_cur, st_idx] = TMAX_OVL if overflows > 0 else max(a, b, c, d) \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index e82b2fc..5d88c6b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,8 +1,18 @@ import pytest -@pytest.fixture +@pytest.fixture(scope='session') def mydir(): import os from pathlib import Path return Path(os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))) + +@pytest.fixture(scope='session') +def b14_circuit(mydir): + from kyupy import verilog + return verilog.load(mydir / 'b14.v.gz', branchforks=True) + +@pytest.fixture(scope='session') +def b14_timing(mydir, b14_circuit): + from kyupy import sdf + return sdf.load(mydir / 'b14.sdf.gz').annotation(b14_circuit) diff --git a/tests/test_wave_sim4.py b/tests/test_wave_sim4.py index 9fe3d3d..918bd06 100644 --- a/tests/test_wave_sim4.py +++ b/tests/test_wave_sim4.py @@ -1,6 +1,6 @@ import numpy as np -from kyupy.wave_sim4 import WaveSim, wave_eval, TMIN, TMAX +from kyupy.wave_sim4 import WaveSim, WaveSimCuda, wave_eval_cpu, TMIN, TMAX from kyupy.logic_sim import LogicSim from kyupy import verilog, sdf, logic, bench from kyupy.logic import MVArray, BPArray @@ -32,7 +32,7 @@ def test_nand_delays(): def wave_assert(inputs, output): for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i - wave_eval(op, c, vat, 0, line_times, sdata) + wave_eval_cpu(op, c, vat, 0, line_times, sdata) for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[4,i], v) wave_assert([[TMAX,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(0,0,1,1) => 1 @@ -53,7 +53,6 @@ def test_tiny_circuit(): lt = np.zeros((len(c.lines), 2, 2)) lt[:,0,:] = 1.0 # unit delay for all lines wsim = WaveSim(c, lt) - print(wsim.prim_counts) assert len(wsim.s) == 5 # values for x @@ -157,18 +156,11 @@ def compare_to_logic_sim(wsim: WaveSim): assert res_str == exp_str -def test_b14(mydir): - c = verilog.load(mydir / 'b14.v.gz', branchforks=True) - df = sdf.load(mydir / 'b14.sdf.gz') - lt = df.annotation(c) - wsim = WaveSim(c, lt, 8) - compare_to_logic_sim(wsim) +def test_b14(b14_circuit, b14_timing): + compare_to_logic_sim(WaveSim(b14_circuit, b14_timing, 8)) +def test_b14_strip_forks(b14_circuit, b14_timing): + compare_to_logic_sim(WaveSim(b14_circuit, b14_timing, 8, strip_forks=True)) -def test_b14_strip_forks(mydir): - c = verilog.load(mydir / 'b14.v.gz', branchforks=True) - df = sdf.load(mydir / 'b14.sdf.gz') - lt = df.annotation(c) - wsim = WaveSim(c, lt, 8, strip_forks=True) - compare_to_logic_sim(wsim) - +def test_b14_cuda(b14_circuit, b14_timing): + compare_to_logic_sim(WaveSimCuda(b14_circuit, b14_timing, 8, strip_forks=True))