|
|
|
@ -99,26 +99,13 @@ class WaveSim(sim.SimOps):
@@ -99,26 +99,13 @@ class WaveSim(sim.SimOps):
|
|
|
|
|
self.simctl_int[0] = range(sims) # unique seed for each sim by default, zero this to pick same delays for all sims. |
|
|
|
|
self.simctl_int[1] = 2 # random picking by default. |
|
|
|
|
|
|
|
|
|
# flat array for line use information |
|
|
|
|
line_use = defaultdict(list) |
|
|
|
|
for lidx in range(len(self.circuit.lines)): |
|
|
|
|
if self.line_use_start[lidx] < 0: continue |
|
|
|
|
if self.line_use_stop[lidx] < 0: |
|
|
|
|
log.warn(f'line {lidx} never read?') |
|
|
|
|
for i in range(self.line_use_start[lidx], self.line_use_stop[lidx]): |
|
|
|
|
line_use[i].append(lidx) |
|
|
|
|
|
|
|
|
|
self.line_use_counts = np.array([len(line_use[i]) for i in range(len(self.levels))], dtype=np.int32) |
|
|
|
|
self.line_use_offsets = np.zeros_like(self.line_use_counts) |
|
|
|
|
self.line_use_offsets[1:] = self.line_use_counts.cumsum()[:-1] |
|
|
|
|
self.line_use = np.hstack([line_use[i] for i in range(len(self.levels))]) |
|
|
|
|
|
|
|
|
|
self.h = np.zeros((self.c_locs_len, sims), dtype=np.float32) # hashes of generated waveforms |
|
|
|
|
self.h_base = np.zeros_like(self.h) # base hashes to compare to |
|
|
|
|
self.e = np.zeros((self.c_locs_len, sims), dtype=np.uint8) # aux data for each line and sim |
|
|
|
|
|
|
|
|
|
self.error_counts = np.zeros(self.s_len, dtype=np.uint32) # number of capture errors by PPO |
|
|
|
|
self.lsts = np.zeros(self.s_len, dtype=np.float32) # LST by PPO |
|
|
|
|
self.overflows = np.zeros(self.s_len, dtype=np.uint32) # Overflows by PPO |
|
|
|
|
|
|
|
|
|
self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.h, self.c_locs, self.c_caps, self.ops, self.simctl_int)]) |
|
|
|
|
self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.e, self.c_locs, self.c_caps, self.ops, self.simctl_int)]) |
|
|
|
|
|
|
|
|
|
def __repr__(self): |
|
|
|
|
dev = 'GPU' if hasattr(self.c, 'copy_to_host') else 'CPU' |
|
|
|
@ -144,7 +131,7 @@ class WaveSim(sim.SimOps):
@@ -144,7 +131,7 @@ class WaveSim(sim.SimOps):
|
|
|
|
|
""" |
|
|
|
|
sims = min(sims or self.sims, self.sims) |
|
|
|
|
for op_start, op_stop in zip(self.level_starts, self.level_stops): |
|
|
|
|
level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.h, self.abuf, 0, sims, self.delays, self.simctl_int, seed) |
|
|
|
|
level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.e, self.abuf, 0, sims, self.delays, self.simctl_int, seed) |
|
|
|
|
|
|
|
|
|
def c_to_s(self, time=TMAX, sd=0.0, seed=1): |
|
|
|
|
"""Simulates a capture operation at all sequential elements and primary outputs. |
|
|
|
@ -172,7 +159,7 @@ class WaveSim(sim.SimOps):
@@ -172,7 +159,7 @@ class WaveSim(sim.SimOps):
|
|
|
|
|
self.s[2, self.ppio_s_locs] = self.s[8, self.ppio_s_locs] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _wave_eval(op, cbuf, c_locs, c_caps, hbuf, sim, delays, simctl_int, seed): |
|
|
|
|
def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed): |
|
|
|
|
overflows = int(0) |
|
|
|
|
|
|
|
|
|
lut = op[0] |
|
|
|
@ -202,8 +189,6 @@ def _wave_eval(op, cbuf, c_locs, c_caps, hbuf, sim, delays, simctl_int, seed):
@@ -202,8 +189,6 @@ def _wave_eval(op, cbuf, c_locs, c_caps, hbuf, sim, delays, simctl_int, seed):
|
|
|
|
|
z_mem = c_locs[z_idx] |
|
|
|
|
z_cap = c_caps[z_idx] |
|
|
|
|
|
|
|
|
|
h = np.float32(0) |
|
|
|
|
|
|
|
|
|
a_cur = int(0) |
|
|
|
|
b_cur = int(0) |
|
|
|
|
c_cur = int(0) |
|
|
|
@ -251,7 +236,6 @@ def _wave_eval(op, cbuf, c_locs, c_caps, hbuf, sim, delays, simctl_int, seed):
@@ -251,7 +236,6 @@ def _wave_eval(op, cbuf, c_locs, c_caps, hbuf, sim, delays, simctl_int, seed):
|
|
|
|
|
next_t = cbuf[d_mem + d_cur, sim] + delays[d_idx, (d_cur & 1) ^ 1, z_val ^ 1] |
|
|
|
|
|
|
|
|
|
if (z_cur & 1) != ((lut >> inputs) & 1): |
|
|
|
|
h += h*3 + max(current_t, -10) # hash based on generated transitions before filtering |
|
|
|
|
# we generate an edge in z_mem, if ... |
|
|
|
|
if (z_cur == 0 # it is the first edge in z_mem ... |
|
|
|
|
or next_t < current_t # -OR- the next edge on SAME input is EARLIER (need current edge to filter BOTH in next iteration) ... |
|
|
|
@ -281,11 +265,15 @@ def _wave_eval(op, cbuf, c_locs, c_caps, hbuf, sim, delays, simctl_int, seed):
@@ -281,11 +265,15 @@ def _wave_eval(op, cbuf, c_locs, c_caps, hbuf, sim, delays, simctl_int, seed):
|
|
|
|
|
# generate or propagate overflow flag |
|
|
|
|
cbuf[z_mem + z_cur, sim] = TMAX_OVL if overflows > 0 else max(a, b, c, d) |
|
|
|
|
|
|
|
|
|
hbuf[z_idx, sim] = h |
|
|
|
|
|
|
|
|
|
nrise = max(0, (z_cur+1) // 2 - (cbuf[z_mem, sim] == TMIN)) |
|
|
|
|
nfall = z_cur // 2 |
|
|
|
|
|
|
|
|
|
e = int(((cbuf[z_mem, sim] == TMIN) << 1) & 2) # initial value |
|
|
|
|
e |= z_val # final value |
|
|
|
|
e |= (nrise + nfall)<<2 # number of transitions |
|
|
|
|
|
|
|
|
|
ebuf[z_idx, sim] = e |
|
|
|
|
|
|
|
|
|
return nrise, nfall |
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -293,11 +281,11 @@ wave_eval_cpu = numba.njit(_wave_eval)
@@ -293,11 +281,11 @@ wave_eval_cpu = numba.njit(_wave_eval)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@numba.njit |
|
|
|
|
def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, hbuf, abuf, sim_start, sim_stop, delays, simctl_int, seed): |
|
|
|
|
def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, ebuf, abuf, sim_start, sim_stop, delays, simctl_int, seed): |
|
|
|
|
for op_idx in range(op_start, op_stop): |
|
|
|
|
op = ops[op_idx] |
|
|
|
|
for sim in range(sim_start, sim_stop): |
|
|
|
|
nrise, nfall = wave_eval_cpu(op, c, c_locs, c_caps, hbuf, sim, delays, simctl_int[:, sim], seed) |
|
|
|
|
nrise, nfall = wave_eval_cpu(op, c, c_locs, c_caps, ebuf, sim, delays, simctl_int[:, sim], seed) |
|
|
|
|
a_loc = op[6] |
|
|
|
|
a_wr = op[7] |
|
|
|
|
a_wf = op[8] |
|
|
|
@ -370,10 +358,10 @@ class WaveSimCuda(WaveSim):
@@ -370,10 +358,10 @@ class WaveSimCuda(WaveSim):
|
|
|
|
|
self.delays = cuda.to_device(self.delays) |
|
|
|
|
self.simctl_int = cuda.to_device(self.simctl_int) |
|
|
|
|
self.abuf = cuda.to_device(self.abuf) |
|
|
|
|
self.h = cuda.to_device(self.h) |
|
|
|
|
self.h_base = cuda.to_device(self.h_base) |
|
|
|
|
self.line_use = cuda.to_device(self.line_use) |
|
|
|
|
self.e = cuda.to_device(self.e) |
|
|
|
|
self.error_counts = cuda.to_device(self.error_counts) |
|
|
|
|
self.lsts = cuda.to_device(self.lsts) |
|
|
|
|
self.overflows = cuda.to_device(self.overflows) |
|
|
|
|
|
|
|
|
|
self.retval_int = cuda.to_device(np.array([0], dtype=np.int32)) |
|
|
|
|
|
|
|
|
@ -389,10 +377,10 @@ class WaveSimCuda(WaveSim):
@@ -389,10 +377,10 @@ class WaveSimCuda(WaveSim):
|
|
|
|
|
state['delays'] = np.array(self.delays) |
|
|
|
|
state['simctl_int'] = np.array(self.simctl_int) |
|
|
|
|
state['abuf'] = np.array(self.abuf) |
|
|
|
|
state['h'] = np.array(self.h) |
|
|
|
|
state['h_base'] = np.array(self.h_base) |
|
|
|
|
state['line_use'] = np.array(self.line_use) |
|
|
|
|
state['e'] = np.array(self.e) |
|
|
|
|
state['error_counts'] = np.array(self.error_counts) |
|
|
|
|
state['lsts'] = np.array(self.lsts) |
|
|
|
|
state['overflows'] = np.array(self.overflows) |
|
|
|
|
state['retval_int'] = np.array(self.retval_int) |
|
|
|
|
return state |
|
|
|
|
|
|
|
|
@ -406,10 +394,10 @@ class WaveSimCuda(WaveSim):
@@ -406,10 +394,10 @@ class WaveSimCuda(WaveSim):
|
|
|
|
|
self.delays = cuda.to_device(self.delays) |
|
|
|
|
self.simctl_int = cuda.to_device(self.simctl_int) |
|
|
|
|
self.abuf = cuda.to_device(self.abuf) |
|
|
|
|
self.h = cuda.to_device(self.h) |
|
|
|
|
self.h_base = cuda.to_device(self.h_base) |
|
|
|
|
self.line_use = cuda.to_device(self.line_use) |
|
|
|
|
self.e = cuda.to_device(self.e) |
|
|
|
|
self.error_counts = cuda.to_device(self.error_counts) |
|
|
|
|
self.lsts = cuda.to_device(self.lsts) |
|
|
|
|
self.overflows = cuda.to_device(self.overflows) |
|
|
|
|
self.retval_int = cuda.to_device(self.retval_int) |
|
|
|
|
|
|
|
|
|
def s_to_c(self): |
|
|
|
@ -424,7 +412,7 @@ class WaveSimCuda(WaveSim):
@@ -424,7 +412,7 @@ class WaveSimCuda(WaveSim):
|
|
|
|
|
if op_from > op_start: continue |
|
|
|
|
if op_to is not None and op_to <= op_start: break |
|
|
|
|
grid_dim = self._grid_dim(sims, op_stop - op_start) |
|
|
|
|
wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.h, self.abuf, int(0), |
|
|
|
|
wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.e, self.abuf, int(0), |
|
|
|
|
sims, self.delays, self.simctl_int, seed) |
|
|
|
|
cuda.synchronize() |
|
|
|
|
|
|
|
|
@ -433,7 +421,7 @@ class WaveSimCuda(WaveSim):
@@ -433,7 +421,7 @@ class WaveSimCuda(WaveSim):
|
|
|
|
|
op_start = self.level_starts[level] |
|
|
|
|
op_stop = self.level_stops[level] |
|
|
|
|
grid_dim = self._grid_dim(sims, op_stop - op_start) |
|
|
|
|
wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.h, self.abuf, int(0), |
|
|
|
|
wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.e, self.abuf, int(0), |
|
|
|
|
sims, self.delays, self.simctl_int, seed) |
|
|
|
|
|
|
|
|
|
def c_to_s(self, time=TMAX, sd=0.0, seed=1): |
|
|
|
@ -445,23 +433,38 @@ class WaveSimCuda(WaveSim):
@@ -445,23 +433,38 @@ class WaveSimCuda(WaveSim):
|
|
|
|
|
grid_dim = self._grid_dim(self.sims, self.s_len) |
|
|
|
|
ppo_to_ppi_gpu[grid_dim, self._block_dim](self.s, self.c_locs, time, self.ppi_offset, self.ppo_offset) |
|
|
|
|
|
|
|
|
|
def set_base_hashes(self): |
|
|
|
|
nitems = self.h_base.shape[0] * self.h_base.shape[1] |
|
|
|
|
grid_dim = cdiv(nitems, 256) |
|
|
|
|
memcpy_gpu[grid_dim, 256](self.h, self.h_base, nitems) |
|
|
|
|
def acc_error_counts(self, sims=None): |
|
|
|
|
sims = min(sims or self.sims, self.sims) |
|
|
|
|
grid_dim = cdiv(self.s_len, 256) |
|
|
|
|
acc_error_counts_gpu[grid_dim, 256](self.s, sims, self.error_counts) |
|
|
|
|
|
|
|
|
|
def compare_hashes_level(self, lv): |
|
|
|
|
self.retval_int[0] = 0 |
|
|
|
|
grid_dim = self._grid_dim(self.sims, self.line_use_counts[lv]) |
|
|
|
|
diff_hash_gpu[grid_dim, self._block_dim](self.h, self.h_base, self.line_use, self.line_use_offsets[lv], |
|
|
|
|
self.line_use_counts[lv], self.retval_int) |
|
|
|
|
return self.retval_int[0] |
|
|
|
|
def reset_error_counts(self): |
|
|
|
|
self.error_counts[:] = 0 |
|
|
|
|
|
|
|
|
|
def get_error_counts(self): |
|
|
|
|
return np.array(self.error_counts) |
|
|
|
|
|
|
|
|
|
def calc_error_counts(self, sims=None): |
|
|
|
|
def acc_overflows(self, sims=None): |
|
|
|
|
sims = min(sims or self.sims, self.sims) |
|
|
|
|
grid_dim = cdiv(self.s_len, 256) |
|
|
|
|
calc_error_counts_gpu[grid_dim, 256](self.s, sims, self.error_counts) |
|
|
|
|
return np.array(self.error_counts) |
|
|
|
|
acc_overflows_gpu[grid_dim, 256](self.s, sims, self.overflows) |
|
|
|
|
|
|
|
|
|
def reset_overflows(self): |
|
|
|
|
self.overflows[:] = 0 |
|
|
|
|
|
|
|
|
|
def get_overflows(self): |
|
|
|
|
return np.array(self.overflows) |
|
|
|
|
|
|
|
|
|
def acc_lsts(self, sims=None): |
|
|
|
|
sims = min(sims or self.sims, self.sims) |
|
|
|
|
grid_dim = cdiv(self.s_len, 256) |
|
|
|
|
acc_lsts_gpu[grid_dim, 256](self.s, sims, self.lsts) |
|
|
|
|
|
|
|
|
|
def reset_lsts(self): |
|
|
|
|
self.lsts[:] = 0.0 |
|
|
|
|
|
|
|
|
|
def get_lsts(self): |
|
|
|
|
return np.array(self.lsts) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@cuda.jit() |
|
|
|
@ -473,23 +476,33 @@ def memcpy_gpu (src, dst, nitems):
@@ -473,23 +476,33 @@ def memcpy_gpu (src, dst, nitems):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@cuda.jit() |
|
|
|
|
def diff_hash_gpu(hbuf1, hbuf2, h_locs, h_locs_offset, h_locs_cnt, differs): |
|
|
|
|
x, y = cuda.grid(2) |
|
|
|
|
if x >= hbuf1.shape[1]: return |
|
|
|
|
if y >= h_locs_cnt: return |
|
|
|
|
h_loc = h_locs[h_locs_offset+y] |
|
|
|
|
if hbuf1[h_loc, x] != hbuf2[h_loc, x]: |
|
|
|
|
differs[0] = 1 |
|
|
|
|
def acc_error_counts_gpu(s, sims, error_counts): |
|
|
|
|
x = cuda.grid(1) |
|
|
|
|
if x >= s.shape[1]: return |
|
|
|
|
cnt = 0 |
|
|
|
|
for i in range(sims): |
|
|
|
|
cnt += (s[6,x,i] != s[8,x,i]) |
|
|
|
|
error_counts[x] += cnt |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@cuda.jit() |
|
|
|
|
def calc_error_counts_gpu(s, sims, error_counts): |
|
|
|
|
def acc_overflows_gpu(s, sims, overflows): |
|
|
|
|
x = cuda.grid(1) |
|
|
|
|
if x >= s.shape[1]: return |
|
|
|
|
cnt = 0 |
|
|
|
|
for i in range(sims): |
|
|
|
|
cnt += (s[6,x,i] != s[8,x,i]) |
|
|
|
|
error_counts[x] = cnt |
|
|
|
|
cnt += s[10,x,i] |
|
|
|
|
overflows[x] += cnt |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@cuda.jit() |
|
|
|
|
def acc_lsts_gpu(s, sims, lsts): |
|
|
|
|
x = cuda.grid(1) |
|
|
|
|
if x >= s.shape[1]: return |
|
|
|
|
lst = 0 |
|
|
|
|
for i in range(sims): |
|
|
|
|
lst = max(lst, s[5,x,i]) |
|
|
|
|
lsts[x] = max(lsts[x], lst) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@cuda.jit() |
|
|
|
@ -520,7 +533,7 @@ _wave_eval_gpu = cuda.jit(_wave_eval, device=True)
@@ -520,7 +533,7 @@ _wave_eval_gpu = cuda.jit(_wave_eval, device=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@cuda.jit() |
|
|
|
|
def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, hbuf, abuf, sim_start, sim_stop, delays, simctl_int, seed): |
|
|
|
|
def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, ebuf, abuf, sim_start, sim_stop, delays, simctl_int, seed): |
|
|
|
|
x, y = cuda.grid(2) |
|
|
|
|
sim = sim_start + x |
|
|
|
|
op_idx = op_start + y |
|
|
|
@ -532,7 +545,7 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, hbuf, abuf, sim_
@@ -532,7 +545,7 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, hbuf, abuf, sim_
|
|
|
|
|
a_wr = op[7] |
|
|
|
|
a_wf = op[8] |
|
|
|
|
|
|
|
|
|
nrise, nfall = _wave_eval_gpu(op, cbuf, c_locs, c_caps, hbuf, sim, delays, simctl_int[:, sim], seed) |
|
|
|
|
nrise, nfall = _wave_eval_gpu(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int[:, sim], seed) |
|
|
|
|
|
|
|
|
|
# accumulate WSA into abuf |
|
|
|
|
if a_loc >= 0: |
|
|
|
|