|
|
|
@ -99,13 +99,19 @@ class WaveSim(sim.SimOps):
@@ -99,13 +99,19 @@ class WaveSim(sim.SimOps):
|
|
|
|
|
self.simctl_int[0] = range(sims) # unique seed for each sim by default, zero this to pick same delays for all sims. |
|
|
|
|
self.simctl_int[1] = 2 # random picking by default. |
|
|
|
|
|
|
|
|
|
self.simctl_float = np.zeros((1, sims), dtype=np.float32) + 1 |
|
|
|
|
"""Float array for per-simulation delay configuration. |
|
|
|
|
|
|
|
|
|
* ``simctl_float[0]`` factor to be multiplied with each delay (default=1.0). |
|
|
|
|
""" |
|
|
|
|
|
|
|
|
|
self.e = np.zeros((self.c_locs_len, sims, 2), dtype=np.uint8) # aux data for each line and sim |
|
|
|
|
|
|
|
|
|
self.error_counts = np.zeros(self.s_len, dtype=np.uint32) # number of capture errors by PPO |
|
|
|
|
self.lsts = np.zeros(self.s_len, dtype=np.float32) # LST by PPO |
|
|
|
|
self.overflows = np.zeros(self.s_len, dtype=np.uint32) # Overflows by PPO |
|
|
|
|
|
|
|
|
|
self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.e, self.c_locs, self.c_caps, self.ops, self.simctl_int)]) |
|
|
|
|
self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.e, self.c_locs, self.c_caps, self.ops, self.simctl_int, self.simctl_float)]) |
|
|
|
|
|
|
|
|
|
def __repr__(self): |
|
|
|
|
dev = 'GPU' if hasattr(self.c, 'copy_to_host') else 'CPU' |
|
|
|
@ -131,7 +137,7 @@ class WaveSim(sim.SimOps):
@@ -131,7 +137,7 @@ class WaveSim(sim.SimOps):
|
|
|
|
|
""" |
|
|
|
|
sims = min(sims or self.sims, self.sims) |
|
|
|
|
for op_start, op_stop in zip(self.level_starts, self.level_stops): |
|
|
|
|
level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.e, self.abuf, 0, sims, self.delays, self.simctl_int, seed, delta) |
|
|
|
|
level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.e, self.abuf, 0, sims, self.delays, self.simctl_int, self.simctl_float, seed, delta) |
|
|
|
|
|
|
|
|
|
def c_to_s(self, time=TMAX, sd=0.0, seed=1): |
|
|
|
|
"""Simulates a capture operation at all sequential elements and primary outputs. |
|
|
|
@ -159,7 +165,7 @@ class WaveSim(sim.SimOps):
@@ -159,7 +165,7 @@ class WaveSim(sim.SimOps):
|
|
|
|
|
self.s[2, self.ppio_s_locs] = self.s[8, self.ppio_s_locs] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, delta): |
|
|
|
|
def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, simctl_float, seed, delta): |
|
|
|
|
overflows = int(0) |
|
|
|
|
|
|
|
|
|
lut = op[0] |
|
|
|
@ -193,6 +199,8 @@ def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, de
@@ -193,6 +199,8 @@ def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, de
|
|
|
|
|
delays = delays[_rnd % len(delays)] |
|
|
|
|
else: |
|
|
|
|
delays = delays[0] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
a_mem = c_locs[a_idx] |
|
|
|
|
b_mem = c_locs[b_idx] |
|
|
|
@ -211,10 +219,10 @@ def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, de
@@ -211,10 +219,10 @@ def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, de
|
|
|
|
|
|
|
|
|
|
z_val = z_cur |
|
|
|
|
|
|
|
|
|
a = cbuf[a_mem + a_cur, sim] + delays[a_idx, 0, z_val] |
|
|
|
|
b = cbuf[b_mem + b_cur, sim] + delays[b_idx, 0, z_val] |
|
|
|
|
c = cbuf[c_mem + c_cur, sim] + delays[c_idx, 0, z_val] |
|
|
|
|
d = cbuf[d_mem + d_cur, sim] + delays[d_idx, 0, z_val] |
|
|
|
|
a = cbuf[a_mem + a_cur, sim] + delays[a_idx, 0, z_val] * simctl_float[0] |
|
|
|
|
b = cbuf[b_mem + b_cur, sim] + delays[b_idx, 0, z_val] * simctl_float[0] |
|
|
|
|
c = cbuf[c_mem + c_cur, sim] + delays[c_idx, 0, z_val] * simctl_float[0] |
|
|
|
|
d = cbuf[d_mem + d_cur, sim] + delays[d_idx, 0, z_val] * simctl_float[0] |
|
|
|
|
|
|
|
|
|
previous_t = TMIN |
|
|
|
|
|
|
|
|
@ -225,27 +233,27 @@ def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, de
@@ -225,27 +233,27 @@ def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, de
|
|
|
|
|
if a == current_t: |
|
|
|
|
a_cur += 1 |
|
|
|
|
inputs ^= 1 |
|
|
|
|
thresh = delays[a_idx, a_cur & 1 ^ 1, z_val] |
|
|
|
|
a = cbuf[a_mem + a_cur, sim] + delays[a_idx, a_cur & 1, z_val] |
|
|
|
|
next_t = cbuf[a_mem + a_cur, sim] + delays[a_idx, (a_cur & 1) ^ 1, z_val ^ 1] |
|
|
|
|
thresh = delays[a_idx, a_cur & 1 ^ 1, z_val] * simctl_float[0] |
|
|
|
|
a = cbuf[a_mem + a_cur, sim] + delays[a_idx, a_cur & 1, z_val] * simctl_float[0] |
|
|
|
|
next_t = cbuf[a_mem + a_cur, sim] + delays[a_idx, (a_cur & 1) ^ 1, z_val ^ 1] * simctl_float[0] |
|
|
|
|
elif b == current_t: |
|
|
|
|
b_cur += 1 |
|
|
|
|
inputs ^= 2 |
|
|
|
|
thresh = delays[b_idx, b_cur & 1 ^ 1, z_val] |
|
|
|
|
b = cbuf[b_mem + b_cur, sim] + delays[b_idx, b_cur & 1, z_val] |
|
|
|
|
next_t = cbuf[b_mem + b_cur, sim] + delays[b_idx, (b_cur & 1) ^ 1, z_val ^ 1] |
|
|
|
|
thresh = delays[b_idx, b_cur & 1 ^ 1, z_val] * simctl_float[0] |
|
|
|
|
b = cbuf[b_mem + b_cur, sim] + delays[b_idx, b_cur & 1, z_val] * simctl_float[0] |
|
|
|
|
next_t = cbuf[b_mem + b_cur, sim] + delays[b_idx, (b_cur & 1) ^ 1, z_val ^ 1] * simctl_float[0] |
|
|
|
|
elif c == current_t: |
|
|
|
|
c_cur += 1 |
|
|
|
|
inputs ^= 4 |
|
|
|
|
thresh = delays[c_idx, c_cur & 1 ^ 1, z_val] |
|
|
|
|
c = cbuf[c_mem + c_cur, sim] + delays[c_idx, c_cur & 1, z_val] |
|
|
|
|
next_t = cbuf[c_mem + c_cur, sim] + delays[c_idx, (c_cur & 1) ^ 1, z_val ^ 1] |
|
|
|
|
thresh = delays[c_idx, c_cur & 1 ^ 1, z_val] * simctl_float[0] |
|
|
|
|
c = cbuf[c_mem + c_cur, sim] + delays[c_idx, c_cur & 1, z_val] * simctl_float[0] |
|
|
|
|
next_t = cbuf[c_mem + c_cur, sim] + delays[c_idx, (c_cur & 1) ^ 1, z_val ^ 1] * simctl_float[0] |
|
|
|
|
else: |
|
|
|
|
d_cur += 1 |
|
|
|
|
inputs ^= 8 |
|
|
|
|
thresh = delays[d_idx, d_cur & 1 ^ 1, z_val] |
|
|
|
|
d = cbuf[d_mem + d_cur, sim] + delays[d_idx, d_cur & 1, z_val] |
|
|
|
|
next_t = cbuf[d_mem + d_cur, sim] + delays[d_idx, (d_cur & 1) ^ 1, z_val ^ 1] |
|
|
|
|
thresh = delays[d_idx, d_cur & 1 ^ 1, z_val] * simctl_float[0] |
|
|
|
|
d = cbuf[d_mem + d_cur, sim] + delays[d_idx, d_cur & 1, z_val] * simctl_float[0] |
|
|
|
|
next_t = cbuf[d_mem + d_cur, sim] + delays[d_idx, (d_cur & 1) ^ 1, z_val ^ 1] * simctl_float[0] |
|
|
|
|
|
|
|
|
|
if (z_cur & 1) != ((lut >> inputs) & 1): |
|
|
|
|
# we generate an edge in z_mem, if ... |
|
|
|
@ -269,10 +277,10 @@ def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, de
@@ -269,10 +277,10 @@ def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, de
|
|
|
|
|
|
|
|
|
|
# output value of cell changed. update all delayed inputs. |
|
|
|
|
z_val = z_val ^ 1 |
|
|
|
|
a = cbuf[a_mem + a_cur, sim] + delays[a_idx, a_cur & 1, z_val] |
|
|
|
|
b = cbuf[b_mem + b_cur, sim] + delays[b_idx, b_cur & 1, z_val] |
|
|
|
|
c = cbuf[c_mem + c_cur, sim] + delays[c_idx, c_cur & 1, z_val] |
|
|
|
|
d = cbuf[d_mem + d_cur, sim] + delays[d_idx, d_cur & 1, z_val] |
|
|
|
|
a = cbuf[a_mem + a_cur, sim] + delays[a_idx, a_cur & 1, z_val] * simctl_float[0] |
|
|
|
|
b = cbuf[b_mem + b_cur, sim] + delays[b_idx, b_cur & 1, z_val] * simctl_float[0] |
|
|
|
|
c = cbuf[c_mem + c_cur, sim] + delays[c_idx, c_cur & 1, z_val] * simctl_float[0] |
|
|
|
|
d = cbuf[d_mem + d_cur, sim] + delays[d_idx, d_cur & 1, z_val] * simctl_float[0] |
|
|
|
|
|
|
|
|
|
current_t = min(a, b, c, d) |
|
|
|
|
|
|
|
|
@ -300,11 +308,11 @@ wave_eval_cpu = numba.njit(_wave_eval)
@@ -300,11 +308,11 @@ wave_eval_cpu = numba.njit(_wave_eval)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@numba.njit |
|
|
|
|
def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, ebuf, abuf, sim_start, sim_stop, delays, simctl_int, seed, delta): |
|
|
|
|
def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, ebuf, abuf, sim_start, sim_stop, delays, simctl_int, simctl_float, seed, delta): |
|
|
|
|
for op_idx in range(op_start, op_stop): |
|
|
|
|
op = ops[op_idx] |
|
|
|
|
for sim in range(sim_start, sim_stop): |
|
|
|
|
nrise, nfall = wave_eval_cpu(op, c, c_locs, c_caps, ebuf, sim, delays, simctl_int[:, sim], seed, delta) |
|
|
|
|
nrise, nfall = wave_eval_cpu(op, c, c_locs, c_caps, ebuf, sim, delays, simctl_int[:, sim], simctl_float[:, sim], seed, delta) |
|
|
|
|
a_loc = op[6] |
|
|
|
|
a_wr = op[7] |
|
|
|
|
a_wf = op[8] |
|
|
|
@ -376,6 +384,7 @@ class WaveSimCuda(WaveSim):
@@ -376,6 +384,7 @@ class WaveSimCuda(WaveSim):
|
|
|
|
|
self.c_caps = cuda.to_device(self.c_caps) |
|
|
|
|
self.delays = cuda.to_device(self.delays) |
|
|
|
|
self.simctl_int = cuda.to_device(self.simctl_int) |
|
|
|
|
self.simctl_float = cuda.to_device(self.simctl_float) |
|
|
|
|
self.abuf = cuda.to_device(self.abuf) |
|
|
|
|
self.e = cuda.to_device(self.e) |
|
|
|
|
self.error_counts = cuda.to_device(self.error_counts) |
|
|
|
@ -395,6 +404,7 @@ class WaveSimCuda(WaveSim):
@@ -395,6 +404,7 @@ class WaveSimCuda(WaveSim):
|
|
|
|
|
state['c_caps'] = np.array(self.c_caps) |
|
|
|
|
state['delays'] = np.array(self.delays) |
|
|
|
|
state['simctl_int'] = np.array(self.simctl_int) |
|
|
|
|
state['simctl_float'] = np.array(self.simctl_float) |
|
|
|
|
state['abuf'] = np.array(self.abuf) |
|
|
|
|
state['e'] = np.array(self.e) |
|
|
|
|
state['error_counts'] = np.array(self.error_counts) |
|
|
|
@ -412,6 +422,7 @@ class WaveSimCuda(WaveSim):
@@ -412,6 +422,7 @@ class WaveSimCuda(WaveSim):
|
|
|
|
|
self.c_caps = cuda.to_device(self.c_caps) |
|
|
|
|
self.delays = cuda.to_device(self.delays) |
|
|
|
|
self.simctl_int = cuda.to_device(self.simctl_int) |
|
|
|
|
self.simctl_float = cuda.to_device(self.simctl_float) |
|
|
|
|
self.abuf = cuda.to_device(self.abuf) |
|
|
|
|
self.e = cuda.to_device(self.e) |
|
|
|
|
self.error_counts = cuda.to_device(self.error_counts) |
|
|
|
@ -432,7 +443,7 @@ class WaveSimCuda(WaveSim):
@@ -432,7 +443,7 @@ class WaveSimCuda(WaveSim):
|
|
|
|
|
if op_to is not None and op_to <= op_start: break |
|
|
|
|
grid_dim = self._grid_dim(sims, op_stop - op_start) |
|
|
|
|
wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.e, self.abuf, int(0), |
|
|
|
|
sims, self.delays, self.simctl_int, seed, delta) |
|
|
|
|
sims, self.delays, self.simctl_int, self.simctl_float, seed, delta) |
|
|
|
|
cuda.synchronize() |
|
|
|
|
|
|
|
|
|
def c_prop_level(self, level, sims=None, seed=1, delta=0): |
|
|
|
@ -441,7 +452,7 @@ class WaveSimCuda(WaveSim):
@@ -441,7 +452,7 @@ class WaveSimCuda(WaveSim):
|
|
|
|
|
op_stop = self.level_stops[level] |
|
|
|
|
grid_dim = self._grid_dim(sims, op_stop - op_start) |
|
|
|
|
wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.e, self.abuf, int(0), |
|
|
|
|
sims, self.delays, self.simctl_int, seed, delta) |
|
|
|
|
sims, self.delays, self.simctl_int, self.simctl_float, seed, delta) |
|
|
|
|
|
|
|
|
|
def c_to_s(self, time=TMAX, sd=0.0, seed=1): |
|
|
|
|
grid_dim = self._grid_dim(self.sims, self.s_len) |
|
|
|
@ -552,7 +563,7 @@ _wave_eval_gpu = cuda.jit(_wave_eval, device=True)
@@ -552,7 +563,7 @@ _wave_eval_gpu = cuda.jit(_wave_eval, device=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@cuda.jit() |
|
|
|
|
def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, ebuf, abuf, sim_start, sim_stop, delays, simctl_int, seed, delta): |
|
|
|
|
def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, ebuf, abuf, sim_start, sim_stop, delays, simctl_int, simctl_float, seed, delta): |
|
|
|
|
x, y = cuda.grid(2) |
|
|
|
|
sim = sim_start + x |
|
|
|
|
op_idx = op_start + y |
|
|
|
@ -564,7 +575,7 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, ebuf, abuf, sim_
@@ -564,7 +575,7 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, ebuf, abuf, sim_
|
|
|
|
|
a_wr = op[7] |
|
|
|
|
a_wf = op[8] |
|
|
|
|
|
|
|
|
|
nrise, nfall = _wave_eval_gpu(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int[:, sim], seed, delta) |
|
|
|
|
nrise, nfall = _wave_eval_gpu(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int[:, sim], simctl_float[:, sim], seed, delta) |
|
|
|
|
|
|
|
|
|
# accumulate WSA into abuf |
|
|
|
|
if a_loc >= 0: |
|
|
|
|