Browse Source

support for static variations

devel
Stefan Holst 2 years ago
parent
commit
7a060b1831
  1. 7
      src/kyupy/__init__.py
  2. 82
      src/kyupy/wave_sim.py
  3. 4
      tests/test_wave_sim.py

7
src/kyupy/__init__.py

@ -101,12 +101,13 @@ class Timer:
class Timers: class Timers:
def __init__(self, t={}): self.timers = defaultdict(Timer) | t def __init__(self, t={}): self.timers = defaultdict(Timer) | t
def __getitem__(self, name): return self.timers[name] def __getitem__(self, name): return self.timers[name]
def __getattr__(self, name): return self.timers[name]
def __repr__(self): return '{' + ', '.join([f'{k}: {v}' for k, v in self.timers.items()]) + '}' def __repr__(self): return '{' + ', '.join([f'{k}: {v}' for k, v in self.timers.items()]) + '}'
def __add__(self, t): def __add__(self, t):
tmr = Timers(self.timers) tmr = Timers(self.timers)
for k, v in t.timers.items(): tmr.timers[k] += v for k, v in t.timers.items(): tmr.timers[k] += v
return tmr return tmr
def sum(self):
return sum([v.s for v in self.timers.values()])
def dict(self): def dict(self):
return dict([(k, v.s) for k, v in self.timers.items()]) return dict([(k, v.s) for k, v in self.timers.items()])
@ -204,7 +205,7 @@ class MockCuda:
self.x = 0 self.x = 0
self.y = 0 self.y = 0
def jit(self, device=False): def jit(self, func=None, device=False):
_ = device # silence "not used" warning _ = device # silence "not used" warning
outer = self outer = self
@ -232,7 +233,7 @@ class MockCuda:
return inner return inner
return Launcher(func) return Launcher(func)
return make_launcher return make_launcher(func) if func else make_launcher
@staticmethod @staticmethod
def to_device(array, to=None): def to_device(array, to=None):

82
src/kyupy/wave_sim.py

@ -76,10 +76,10 @@ class WaveSim(sim.SimOps):
final values in the waveforms are still valid. final values in the waveforms are still valid.
""" """
self.params = np.zeros((sims, 4), dtype=np.float32) self.simctl_int = np.zeros((1, sims), dtype=np.int32)
self.params[...,0] = 1.0 self.simctl_int[0] = range(sims)
self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.c_locs, self.c_caps, self.ops, self.params)]) self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.c_locs, self.c_caps, self.ops, self.simctl_int)])
def __repr__(self): def __repr__(self):
return f'<{type(self).__name__} {self.circuit.name} sims={self.sims} ops={len(self.ops)} ' + \ return f'<{type(self).__name__} {self.circuit.name} sims={self.sims} ops={len(self.ops)} ' + \
@ -105,7 +105,7 @@ class WaveSim(sim.SimOps):
""" """
sims = min(sims or self.sims, self.sims) sims = min(sims or self.sims, self.sims)
for op_start, op_stop in zip(self.level_starts, self.level_stops): for op_start, op_stop in zip(self.level_starts, self.level_stops):
level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, 0, sims, self.delays, self.params, seed) level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, 0, sims, self.delays, self.simctl_int, seed)
def c_to_s(self, time=TMAX, sd=0.0, seed=1): def c_to_s(self, time=TMAX, sd=0.0, seed=1):
"""Simulates a capture operation at all sequential elements and primary outputs. """Simulates a capture operation at all sequential elements and primary outputs.
@ -133,11 +133,11 @@ class WaveSim(sim.SimOps):
self.s[2, self.ppio_s_locs] = self.s[8, self.ppio_s_locs] self.s[2, self.ppio_s_locs] = self.s[8, self.ppio_s_locs]
def _wave_eval(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, st_idx, delays, param, seed=0): def _wave_eval(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0):
overflows = int(0) overflows = int(0)
if len(delays) > 1: if len(delays) > 1:
_rnd = (seed << 4) + (z_idx << 20) + (st_idx << 1) _rnd = (seed << 4) + (z_idx << 20) + simctl_int[0]
for _ in range(4): for _ in range(4):
_rnd = int(0xDEECE66D) * _rnd + 0xB _rnd = int(0xDEECE66D) * _rnd + 0xB
delays = delays[_rnd % len(delays)] delays = delays[_rnd % len(delays)]
@ -157,14 +157,14 @@ def _wave_eval(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, st_
d_cur = int(0) d_cur = int(0)
z_cur = lut & 1 z_cur = lut & 1
if z_cur == 1: if z_cur == 1:
cbuf[z_mem, st_idx] = TMIN cbuf[z_mem, sim] = TMIN
z_val = z_cur z_val = z_cur
a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val] a = cbuf[a_mem + a_cur, sim] + delays[a_idx, 0, z_val]
b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val] b = cbuf[b_mem + b_cur, sim] + delays[b_idx, 0, z_val]
c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val] c = cbuf[c_mem + c_cur, sim] + delays[c_idx, 0, z_val]
d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val] d = cbuf[d_mem + d_cur, sim] + delays[d_idx, 0, z_val]
previous_t = TMIN previous_t = TMIN
@ -176,26 +176,26 @@ def _wave_eval(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, st_
a_cur += 1 a_cur += 1
inputs ^= 1 inputs ^= 1
thresh = delays[a_idx, 0, z_val] thresh = delays[a_idx, 0, z_val]
a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val] a = cbuf[a_mem + a_cur, sim] + delays[a_idx, 0, z_val]
next_t = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val ^ 1] next_t = cbuf[a_mem + a_cur, sim] + delays[a_idx, 0, z_val ^ 1]
elif b == current_t: elif b == current_t:
b_cur += 1 b_cur += 1
inputs ^= 2 inputs ^= 2
thresh = delays[b_idx, 0, z_val] thresh = delays[b_idx, 0, z_val]
b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val] b = cbuf[b_mem + b_cur, sim] + delays[b_idx, 0, z_val]
next_t = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val ^ 1] next_t = cbuf[b_mem + b_cur, sim] + delays[b_idx, 0, z_val ^ 1]
elif c == current_t: elif c == current_t:
c_cur += 1 c_cur += 1
inputs ^= 4 inputs ^= 4
thresh = delays[c_idx, 0, z_val] thresh = delays[c_idx, 0, z_val]
c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val] c = cbuf[c_mem + c_cur, sim] + delays[c_idx, 0, z_val]
next_t = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val ^ 1] next_t = cbuf[c_mem + c_cur, sim] + delays[c_idx, 0, z_val ^ 1]
else: else:
d_cur += 1 d_cur += 1
inputs ^= 8 inputs ^= 8
thresh = delays[d_idx, 0, z_val] thresh = delays[d_idx, 0, z_val]
d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val] d = cbuf[d_mem + d_cur, sim] + delays[d_idx, 0, z_val]
next_t = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val ^ 1] next_t = cbuf[d_mem + d_cur, sim] + delays[d_idx, 0, z_val ^ 1]
if (z_cur & 1) != ((lut >> inputs) & 1): if (z_cur & 1) != ((lut >> inputs) & 1):
# we generate an edge in z_mem, if ... # we generate an edge in z_mem, if ...
@ -204,45 +204,45 @@ def _wave_eval(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, st_
or (current_t - previous_t) > thresh # -OR- the generated hazard is wider than pulse threshold. or (current_t - previous_t) > thresh # -OR- the generated hazard is wider than pulse threshold.
): ):
if z_cur < (z_cap - 1): # enough space in z_mem? if z_cur < (z_cap - 1): # enough space in z_mem?
cbuf[z_mem + z_cur, st_idx] = current_t cbuf[z_mem + z_cur, sim] = current_t
previous_t = current_t previous_t = current_t
z_cur += 1 z_cur += 1
else: else:
overflows += 1 overflows += 1
previous_t = cbuf[z_mem + z_cur - 1, st_idx] previous_t = cbuf[z_mem + z_cur - 1, sim]
z_cur -= 1 z_cur -= 1
else: else:
z_cur -= 1 z_cur -= 1
previous_t = cbuf[z_mem + z_cur - 1, st_idx] if z_cur > 0 else TMIN previous_t = cbuf[z_mem + z_cur - 1, sim] if z_cur > 0 else TMIN
# output value of cell changed. update all delayed inputs. # output value of cell changed. update all delayed inputs.
z_val = z_val ^ 1 z_val = z_val ^ 1
a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val] a = cbuf[a_mem + a_cur, sim] + delays[a_idx, 0, z_val]
b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val] b = cbuf[b_mem + b_cur, sim] + delays[b_idx, 0, z_val]
c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val] c = cbuf[c_mem + c_cur, sim] + delays[c_idx, 0, z_val]
d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val] d = cbuf[d_mem + d_cur, sim] + delays[d_idx, 0, z_val]
current_t = min(a, b, c, d) current_t = min(a, b, c, d)
# generate or propagate overflow flag # generate or propagate overflow flag
cbuf[z_mem + z_cur, st_idx] = TMAX_OVL if overflows > 0 else max(a, b, c, d) cbuf[z_mem + z_cur, sim] = TMAX_OVL if overflows > 0 else max(a, b, c, d)
_wave_eval_cpu = numba.njit(_wave_eval) _wave_eval_cpu = numba.njit(_wave_eval)
@numba.njit @numba.njit
def wave_eval_cpu(op, cbuf, c_locs, c_caps, st_idx, delays, param, seed=0): def wave_eval_cpu(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0):
lut, z_idx, a_idx, b_idx, c_idx, d_idx = op lut, z_idx, a_idx, b_idx, c_idx, d_idx = op
_wave_eval_cpu(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, st_idx, delays, param, seed) _wave_eval_cpu(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed)
@numba.njit @numba.njit
def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, st_start, st_stop, delays, params, seed): def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, sim_start, sim_stop, delays, simctl_int, seed):
for op_idx in range(op_start, op_stop): for op_idx in range(op_start, op_stop):
op = ops[op_idx] op = ops[op_idx]
for st_idx in range(st_start, st_stop): for sim in range(sim_start, sim_stop):
wave_eval_cpu(op, c, c_locs, c_caps, st_idx, delays, params[st_idx], seed) wave_eval_cpu(op, c, c_locs, c_caps, sim, delays, simctl_int[:, sim], seed)
@numba.njit @numba.njit
@ -307,7 +307,7 @@ class WaveSimCuda(WaveSim):
self.c_locs = cuda.to_device(self.c_locs) self.c_locs = cuda.to_device(self.c_locs)
self.c_caps = cuda.to_device(self.c_caps) self.c_caps = cuda.to_device(self.c_caps)
self.delays = cuda.to_device(self.delays) self.delays = cuda.to_device(self.delays)
self.params = cuda.to_device(self.params) self.simctl_int = cuda.to_device(self.simctl_int)
self._block_dim = (32, 16) self._block_dim = (32, 16)
@ -319,7 +319,7 @@ class WaveSimCuda(WaveSim):
state['c_locs'] = np.array(self.c_locs) state['c_locs'] = np.array(self.c_locs)
state['c_caps'] = np.array(self.c_caps) state['c_caps'] = np.array(self.c_caps)
state['delays'] = np.array(self.delays) state['delays'] = np.array(self.delays)
state['params'] = np.array(self.params) state['simctl_int'] = np.array(self.simctl_int)
return state return state
def __setstate__(self, state): def __setstate__(self, state):
@ -330,7 +330,7 @@ class WaveSimCuda(WaveSim):
self.c_locs = cuda.to_device(self.c_locs) self.c_locs = cuda.to_device(self.c_locs)
self.c_caps = cuda.to_device(self.c_caps) self.c_caps = cuda.to_device(self.c_caps)
self.delays = cuda.to_device(self.delays) self.delays = cuda.to_device(self.delays)
self.params = cuda.to_device(self.params) self.simctl_int = cuda.to_device(self.simctl_int)
def s_to_c(self): def s_to_c(self):
grid_dim = self._grid_dim(self.sims, self.s_len) grid_dim = self._grid_dim(self.sims, self.s_len)
@ -343,7 +343,7 @@ class WaveSimCuda(WaveSim):
for op_start, op_stop in zip(self.level_starts, self.level_stops): for op_start, op_stop in zip(self.level_starts, self.level_stops):
grid_dim = self._grid_dim(sims, op_stop - op_start) grid_dim = self._grid_dim(sims, op_stop - op_start)
wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, int(0), wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, int(0),
sims, self.delays, self.params, seed) sims, self.delays, self.simctl_int, seed)
cuda.synchronize() cuda.synchronize()
def c_to_s(self, time=TMAX, sd=0.0, seed=1): def c_to_s(self, time=TMAX, sd=0.0, seed=1):
@ -384,11 +384,11 @@ _wave_eval_gpu = cuda.jit(_wave_eval, device=True)
@cuda.jit() @cuda.jit()
def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, st_start, st_stop, delays, param, seed): def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, sim_start, sim_stop, delays, simctl_int, seed):
x, y = cuda.grid(2) x, y = cuda.grid(2)
st_idx = st_start + x sim = sim_start + x
op_idx = op_start + y op_idx = op_start + y
if st_idx >= st_stop: return if sim >= sim_stop: return
if op_idx >= op_stop: return if op_idx >= op_stop: return
lut = ops[op_idx, 0] lut = ops[op_idx, 0]
@ -398,9 +398,7 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, st_start, st_sto
c_idx = ops[op_idx, 4] c_idx = ops[op_idx, 4]
d_idx = ops[op_idx, 5] d_idx = ops[op_idx, 5]
param = param[st_idx] _wave_eval_gpu(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, sim, delays, simctl_int[:, sim], seed)
_wave_eval_gpu(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, st_idx, delays, param, seed)
@cuda.jit() @cuda.jit()

4
tests/test_wave_sim.py

@ -28,11 +28,11 @@ def test_nand_delays():
delays[0, 3, :, 0] = 0.7 # as above for D -> Z delays[0, 3, :, 0] = 0.7 # as above for D -> Z
delays[0, 3, :, 1] = 0.8 delays[0, 3, :, 1] = 0.8
sdata = np.asarray([1, -1, 0, 0], dtype='float32') simctl_int = np.asarray([0], dtype=np.int32)
def wave_assert(inputs, output): def wave_assert(inputs, output):
for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i
wave_eval_cpu(op, c, c_locs, c_caps, 0, delays, sdata) wave_eval_cpu(op, c, c_locs, c_caps, 0, delays, simctl_int)
for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[4,i], v) for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[4,i], v)
wave_assert([[TMAX,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(0,0,1,1) => 1 wave_assert([[TMAX,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(0,0,1,1) => 1

Loading…
Cancel
Save