From 7a060b183151a4dc956da2998e11d15db9bfdc76 Mon Sep 17 00:00:00 2001 From: Stefan Holst Date: Tue, 28 Mar 2023 11:10:38 +0900 Subject: [PATCH] support for static variations --- src/kyupy/__init__.py | 7 ++-- src/kyupy/wave_sim.py | 82 +++++++++++++++++++++--------------------- tests/test_wave_sim.py | 4 +-- 3 files changed, 46 insertions(+), 47 deletions(-) diff --git a/src/kyupy/__init__.py b/src/kyupy/__init__.py index 06c9ab8..cd42d1b 100644 --- a/src/kyupy/__init__.py +++ b/src/kyupy/__init__.py @@ -101,12 +101,13 @@ class Timer: class Timers: def __init__(self, t={}): self.timers = defaultdict(Timer) | t def __getitem__(self, name): return self.timers[name] - def __getattr__(self, name): return self.timers[name] def __repr__(self): return '{' + ', '.join([f'{k}: {v}' for k, v in self.timers.items()]) + '}' def __add__(self, t): tmr = Timers(self.timers) for k, v in t.timers.items(): tmr.timers[k] += v return tmr + def sum(self): + return sum([v.s for v in self.timers.values()]) def dict(self): return dict([(k, v.s) for k, v in self.timers.items()]) @@ -204,7 +205,7 @@ class MockCuda: self.x = 0 self.y = 0 - def jit(self, device=False): + def jit(self, func=None, device=False): _ = device # silence "not used" warning outer = self @@ -232,7 +233,7 @@ class MockCuda: return inner return Launcher(func) - return make_launcher + return make_launcher(func) if func else make_launcher @staticmethod def to_device(array, to=None): diff --git a/src/kyupy/wave_sim.py b/src/kyupy/wave_sim.py index 297fd4d..2f3d77d 100644 --- a/src/kyupy/wave_sim.py +++ b/src/kyupy/wave_sim.py @@ -76,10 +76,10 @@ class WaveSim(sim.SimOps): final values in the waveforms are still valid. """ - self.params = np.zeros((sims, 4), dtype=np.float32) - self.params[...,0] = 1.0 + self.simctl_int = np.zeros((1, sims), dtype=np.int32) + self.simctl_int[0] = range(sims) - self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.c_locs, self.c_caps, self.ops, self.params)]) + self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.c_locs, self.c_caps, self.ops, self.simctl_int)]) def __repr__(self): return f'<{type(self).__name__} {self.circuit.name} sims={self.sims} ops={len(self.ops)} ' + \ @@ -105,7 +105,7 @@ class WaveSim(sim.SimOps): """ sims = min(sims or self.sims, self.sims) for op_start, op_stop in zip(self.level_starts, self.level_stops): - level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, 0, sims, self.delays, self.params, seed) + level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, 0, sims, self.delays, self.simctl_int, seed) def c_to_s(self, time=TMAX, sd=0.0, seed=1): """Simulates a capture operation at all sequential elements and primary outputs. @@ -133,11 +133,11 @@ class WaveSim(sim.SimOps): self.s[2, self.ppio_s_locs] = self.s[8, self.ppio_s_locs] -def _wave_eval(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, st_idx, delays, param, seed=0): +def _wave_eval(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0): overflows = int(0) if len(delays) > 1: - _rnd = (seed << 4) + (z_idx << 20) + (st_idx << 1) + _rnd = (seed << 4) + (z_idx << 20) + simctl_int[0] for _ in range(4): _rnd = int(0xDEECE66D) * _rnd + 0xB delays = delays[_rnd % len(delays)] @@ -157,14 +157,14 @@ def _wave_eval(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, st_ d_cur = int(0) z_cur = lut & 1 if z_cur == 1: - cbuf[z_mem, st_idx] = TMIN + cbuf[z_mem, sim] = TMIN z_val = z_cur - a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val] - b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val] - c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val] - d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val] + a = cbuf[a_mem + a_cur, sim] + delays[a_idx, 0, z_val] + b = cbuf[b_mem + b_cur, sim] + delays[b_idx, 0, z_val] + c = cbuf[c_mem + c_cur, sim] + delays[c_idx, 0, z_val] + d = cbuf[d_mem + d_cur, sim] + delays[d_idx, 0, z_val] previous_t = TMIN @@ -176,26 +176,26 @@ def _wave_eval(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, st_ a_cur += 1 inputs ^= 1 thresh = delays[a_idx, 0, z_val] - a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val] - next_t = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val ^ 1] + a = cbuf[a_mem + a_cur, sim] + delays[a_idx, 0, z_val] + next_t = cbuf[a_mem + a_cur, sim] + delays[a_idx, 0, z_val ^ 1] elif b == current_t: b_cur += 1 inputs ^= 2 thresh = delays[b_idx, 0, z_val] - b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val] - next_t = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val ^ 1] + b = cbuf[b_mem + b_cur, sim] + delays[b_idx, 0, z_val] + next_t = cbuf[b_mem + b_cur, sim] + delays[b_idx, 0, z_val ^ 1] elif c == current_t: c_cur += 1 inputs ^= 4 thresh = delays[c_idx, 0, z_val] - c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val] - next_t = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val ^ 1] + c = cbuf[c_mem + c_cur, sim] + delays[c_idx, 0, z_val] + next_t = cbuf[c_mem + c_cur, sim] + delays[c_idx, 0, z_val ^ 1] else: d_cur += 1 inputs ^= 8 thresh = delays[d_idx, 0, z_val] - d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val] - next_t = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val ^ 1] + d = cbuf[d_mem + d_cur, sim] + delays[d_idx, 0, z_val] + next_t = cbuf[d_mem + d_cur, sim] + delays[d_idx, 0, z_val ^ 1] if (z_cur & 1) != ((lut >> inputs) & 1): # we generate an edge in z_mem, if ... @@ -204,45 +204,45 @@ def _wave_eval(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, st_ or (current_t - previous_t) > thresh # -OR- the generated hazard is wider than pulse threshold. ): if z_cur < (z_cap - 1): # enough space in z_mem? - cbuf[z_mem + z_cur, st_idx] = current_t + cbuf[z_mem + z_cur, sim] = current_t previous_t = current_t z_cur += 1 else: overflows += 1 - previous_t = cbuf[z_mem + z_cur - 1, st_idx] + previous_t = cbuf[z_mem + z_cur - 1, sim] z_cur -= 1 else: z_cur -= 1 - previous_t = cbuf[z_mem + z_cur - 1, st_idx] if z_cur > 0 else TMIN + previous_t = cbuf[z_mem + z_cur - 1, sim] if z_cur > 0 else TMIN # output value of cell changed. update all delayed inputs. z_val = z_val ^ 1 - a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val] - b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val] - c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val] - d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val] + a = cbuf[a_mem + a_cur, sim] + delays[a_idx, 0, z_val] + b = cbuf[b_mem + b_cur, sim] + delays[b_idx, 0, z_val] + c = cbuf[c_mem + c_cur, sim] + delays[c_idx, 0, z_val] + d = cbuf[d_mem + d_cur, sim] + delays[d_idx, 0, z_val] current_t = min(a, b, c, d) # generate or propagate overflow flag - cbuf[z_mem + z_cur, st_idx] = TMAX_OVL if overflows > 0 else max(a, b, c, d) + cbuf[z_mem + z_cur, sim] = TMAX_OVL if overflows > 0 else max(a, b, c, d) _wave_eval_cpu = numba.njit(_wave_eval) @numba.njit -def wave_eval_cpu(op, cbuf, c_locs, c_caps, st_idx, delays, param, seed=0): +def wave_eval_cpu(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0): lut, z_idx, a_idx, b_idx, c_idx, d_idx = op - _wave_eval_cpu(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, st_idx, delays, param, seed) + _wave_eval_cpu(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed) @numba.njit -def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, st_start, st_stop, delays, params, seed): +def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, sim_start, sim_stop, delays, simctl_int, seed): for op_idx in range(op_start, op_stop): op = ops[op_idx] - for st_idx in range(st_start, st_stop): - wave_eval_cpu(op, c, c_locs, c_caps, st_idx, delays, params[st_idx], seed) + for sim in range(sim_start, sim_stop): + wave_eval_cpu(op, c, c_locs, c_caps, sim, delays, simctl_int[:, sim], seed) @numba.njit @@ -307,7 +307,7 @@ class WaveSimCuda(WaveSim): self.c_locs = cuda.to_device(self.c_locs) self.c_caps = cuda.to_device(self.c_caps) self.delays = cuda.to_device(self.delays) - self.params = cuda.to_device(self.params) + self.simctl_int = cuda.to_device(self.simctl_int) self._block_dim = (32, 16) @@ -319,7 +319,7 @@ class WaveSimCuda(WaveSim): state['c_locs'] = np.array(self.c_locs) state['c_caps'] = np.array(self.c_caps) state['delays'] = np.array(self.delays) - state['params'] = np.array(self.params) + state['simctl_int'] = np.array(self.simctl_int) return state def __setstate__(self, state): @@ -330,7 +330,7 @@ class WaveSimCuda(WaveSim): self.c_locs = cuda.to_device(self.c_locs) self.c_caps = cuda.to_device(self.c_caps) self.delays = cuda.to_device(self.delays) - self.params = cuda.to_device(self.params) + self.simctl_int = cuda.to_device(self.simctl_int) def s_to_c(self): grid_dim = self._grid_dim(self.sims, self.s_len) @@ -343,7 +343,7 @@ class WaveSimCuda(WaveSim): for op_start, op_stop in zip(self.level_starts, self.level_stops): grid_dim = self._grid_dim(sims, op_stop - op_start) wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, int(0), - sims, self.delays, self.params, seed) + sims, self.delays, self.simctl_int, seed) cuda.synchronize() def c_to_s(self, time=TMAX, sd=0.0, seed=1): @@ -384,11 +384,11 @@ _wave_eval_gpu = cuda.jit(_wave_eval, device=True) @cuda.jit() -def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, st_start, st_stop, delays, param, seed): +def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, sim_start, sim_stop, delays, simctl_int, seed): x, y = cuda.grid(2) - st_idx = st_start + x + sim = sim_start + x op_idx = op_start + y - if st_idx >= st_stop: return + if sim >= sim_stop: return if op_idx >= op_stop: return lut = ops[op_idx, 0] @@ -398,9 +398,7 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, st_start, st_sto c_idx = ops[op_idx, 4] d_idx = ops[op_idx, 5] - param = param[st_idx] - - _wave_eval_gpu(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, st_idx, delays, param, seed) + _wave_eval_gpu(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, sim, delays, simctl_int[:, sim], seed) @cuda.jit() diff --git a/tests/test_wave_sim.py b/tests/test_wave_sim.py index 1003689..b07f683 100644 --- a/tests/test_wave_sim.py +++ b/tests/test_wave_sim.py @@ -28,11 +28,11 @@ def test_nand_delays(): delays[0, 3, :, 0] = 0.7 # as above for D -> Z delays[0, 3, :, 1] = 0.8 - sdata = np.asarray([1, -1, 0, 0], dtype='float32') + simctl_int = np.asarray([0], dtype=np.int32) def wave_assert(inputs, output): for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i - wave_eval_cpu(op, c, c_locs, c_caps, 0, delays, sdata) + wave_eval_cpu(op, c, c_locs, c_caps, 0, delays, simctl_int) for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[4,i], v) wave_assert([[TMAX,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(0,0,1,1) => 1