Browse Source

more cleanup

devel
Stefan Holst 2 years ago
parent
commit
70caea065e
  1. 136
      src/kyupy/wave_sim.py

136
src/kyupy/wave_sim.py

@ -16,7 +16,7 @@ import math @@ -16,7 +16,7 @@ import math
import numpy as np
from . import numba, cuda, hr_bytes, sim
from . import numba, cuda, hr_bytes, sim, cdiv
TMAX = np.float32(2 ** 127)
@ -97,17 +97,15 @@ class WaveSim(sim.SimOps): @@ -97,17 +97,15 @@ class WaveSim(sim.SimOps):
self.c[self.pippi_c_locs+1] = np.choose(cond, [TMAX, TMAX, sins[1], TMAX])
self.c[self.pippi_c_locs+2] = TMAX
def c_prop(self, sims=None, sd=0.0, seed=1):
def c_prop(self, sims=None, seed=1):
"""Propagates all waveforms from the (pseudo) primary inputs to the (pseudo) primary outputs.
:param sims: Number of parallel simulations to execute. If None, all available simulations are performed.
:param sd: Standard deviation for injection of random delay variation. Active, if value is positive.
:param seed: Random seed for delay variations.
"""
sims = min(sims or self.sims, self.sims)
for op_start, op_stop in zip(self.level_starts, self.level_stops):
level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, 0, sims,
self.delays, self.params, sd, seed)
level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, 0, sims, self.delays, self.params, seed)
def c_to_s(self, time=TMAX, sd=0.0, seed=1):
"""Simulates a capture operation at all sequential elements and primary outputs.
@ -135,11 +133,7 @@ class WaveSim(sim.SimOps): @@ -135,11 +133,7 @@ class WaveSim(sim.SimOps):
self.s[2, self.ppio_s_locs] = self.s[8, self.ppio_s_locs]
@numba.njit
def wave_eval_cpu(op, cbuf, c_locs, c_caps, st_idx, delays, param, sd=0.0, seed=0):
lut, z_idx, a_idx, b_idx, c_idx, d_idx = op
# >>> same code as wave_eval_cpu (except rand_gauss_*pu()-calls) >>>
def _wave_eval(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, st_idx, delays, param, seed=0):
overflows = int(0)
if len(delays) > 1:
@ -234,13 +228,21 @@ def wave_eval_cpu(op, cbuf, c_locs, c_caps, st_idx, delays, param, sd=0.0, seed= @@ -234,13 +228,21 @@ def wave_eval_cpu(op, cbuf, c_locs, c_caps, st_idx, delays, param, sd=0.0, seed=
cbuf[z_mem + z_cur, st_idx] = TMAX_OVL if overflows > 0 else max(a, b, c, d)
_wave_eval_cpu = numba.njit(_wave_eval)
@numba.njit
def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, st_start, st_stop, delays, params, sd, seed):
overflows = 0
def wave_eval_cpu(op, cbuf, c_locs, c_caps, st_idx, delays, param, seed=0):
lut, z_idx, a_idx, b_idx, c_idx, d_idx = op
_wave_eval_cpu(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, st_idx, delays, param, seed)
@numba.njit
def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, st_start, st_stop, delays, params, seed):
for op_idx in range(op_start, op_stop):
op = ops[op_idx]
for st_idx in range(st_start, st_stop):
wave_eval_cpu(op, c, c_locs, c_caps, st_idx, delays, params[st_idx], sd, seed)
wave_eval_cpu(op, c, c_locs, c_caps, st_idx, delays, params[st_idx], seed)
@numba.njit
@ -313,17 +315,14 @@ class WaveSimCuda(WaveSim): @@ -313,17 +315,14 @@ class WaveSimCuda(WaveSim):
grid_dim = self._grid_dim(self.sims, self.s_len)
wave_assign_gpu[grid_dim, self._block_dim](self.c, self.s, self.c_locs, self.ppi_offset)
def _grid_dim(self, x, y):
gx = math.ceil(x / self._block_dim[0])
gy = math.ceil(y / self._block_dim[1])
return gx, gy
def _grid_dim(self, x, y): return cdiv(x, self._block_dim[0]), cdiv(y, self._block_dim[1])
def c_prop(self, sims=None, sd=0.0, seed=1):
def c_prop(self, sims=None, seed=1):
sims = min(sims or self.sims, self.sims)
for op_start, op_stop in zip(self.level_starts, self.level_stops):
grid_dim = self._grid_dim(sims, op_stop - op_start)
wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, int(0),
sims, self.delays, self.params, sd, seed)
sims, self.delays, self.params, seed)
cuda.synchronize()
def c_to_s(self, time=TMAX, sd=0.0, seed=1):
@ -360,8 +359,11 @@ def wave_assign_gpu(c, s, c_locs, ppi_offset): @@ -360,8 +359,11 @@ def wave_assign_gpu(c, s, c_locs, ppi_offset):
c[c_loc+2, x] = TMAX
_wave_eval_gpu = cuda.jit(_wave_eval, device=True)
@cuda.jit()
def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, st_start, st_stop, delays, param, sd, seed):
def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, st_start, st_stop, delays, param, seed):
x, y = cuda.grid(2)
st_idx = st_start + x
op_idx = op_start + y
@ -377,99 +379,7 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, st_start, st_sto @@ -377,99 +379,7 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, st_start, st_sto
param = param[st_idx]
# >>> same code as wave_eval_cpu (except rand_gauss_*pu()-calls) >>>
overflows = int(0)
if len(delays) > 1:
_rnd = (seed << 4) + (z_idx << 20) + (st_idx << 1)
for _ in range(4):
_rnd = int(0xDEECE66D) * _rnd + 0xB
delays = delays[_rnd % len(delays)]
else:
delays = delays[0]
a_mem = c_locs[a_idx]
b_mem = c_locs[b_idx]
c_mem = c_locs[c_idx]
d_mem = c_locs[d_idx]
z_mem = c_locs[z_idx]
z_cap = c_caps[z_idx]
a_cur = int(0)
b_cur = int(0)
c_cur = int(0)
d_cur = int(0)
z_cur = lut & 1
if z_cur == 1:
cbuf[z_mem, st_idx] = TMIN
z_val = z_cur
a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val]
b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val]
c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val]
d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val]
previous_t = TMIN
current_t = min(a, b, c, d)
inputs = int(0)
while current_t < TMAX:
if a == current_t:
a_cur += 1
inputs ^= 1
thresh = delays[a_idx, 0, z_val]
a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val]
next_t = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val ^ 1]
elif b == current_t:
b_cur += 1
inputs ^= 2
thresh = delays[b_idx, 0, z_val]
b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val]
next_t = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val ^ 1]
elif c == current_t:
c_cur += 1
inputs ^= 4
thresh = delays[c_idx, 0, z_val]
c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val]
next_t = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val ^ 1]
else:
d_cur += 1
inputs ^= 8
thresh = delays[d_idx, 0, z_val]
d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val]
next_t = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val ^ 1]
if (z_cur & 1) != ((lut >> inputs) & 1):
# we generate an edge in z_mem, if ...
if (z_cur == 0 # it is the first edge in z_mem ...
or next_t < current_t # -OR- the next edge on SAME input is EARLIER (need current edge to filter BOTH in next iteration) ...
or (current_t - previous_t) > thresh # -OR- the generated hazard is wider than pulse threshold.
):
if z_cur < (z_cap - 1): # enough space in z_mem?
cbuf[z_mem + z_cur, st_idx] = current_t
previous_t = current_t
z_cur += 1
else:
overflows += 1
previous_t = cbuf[z_mem + z_cur - 1, st_idx]
z_cur -= 1
else:
z_cur -= 1
previous_t = cbuf[z_mem + z_cur - 1, st_idx] if z_cur > 0 else TMIN
# output value of cell changed. update all delayed inputs.
z_val = z_val ^ 1
a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val]
b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val]
c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val]
d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val]
current_t = min(a, b, c, d)
# generate or propagate overflow flag
cbuf[z_mem + z_cur, st_idx] = TMAX_OVL if overflows > 0 else max(a, b, c, d)
_wave_eval_gpu(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, st_idx, delays, param, seed)
@cuda.jit()

Loading…
Cancel
Save