|
|
@ -13,10 +13,11 @@ Two simulators are available: :py:class:`WaveSim` runs on the CPU, and the deriv |
|
|
|
""" |
|
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
import math |
|
|
|
import math |
|
|
|
|
|
|
|
from collections import defaultdict |
|
|
|
|
|
|
|
|
|
|
|
import numpy as np |
|
|
|
import numpy as np |
|
|
|
|
|
|
|
|
|
|
|
from . import numba, cuda, sim, cdiv, eng |
|
|
|
from . import log, numba, cuda, sim, cdiv, eng |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TMAX = np.float32(2 ** 127) |
|
|
|
TMAX = np.float32(2 ** 127) |
|
|
@ -59,8 +60,8 @@ class WaveSim(sim.SimOps): |
|
|
|
self.delays = np.zeros((len(delays), self.c_locs_len, 2, 2), dtype=delays.dtype) |
|
|
|
self.delays = np.zeros((len(delays), self.c_locs_len, 2, 2), dtype=delays.dtype) |
|
|
|
self.delays[:, :delays.shape[1]] = delays |
|
|
|
self.delays[:, :delays.shape[1]] = delays |
|
|
|
|
|
|
|
|
|
|
|
self.c = np.zeros((self.c_len, sims), dtype=np.float32) + TMAX |
|
|
|
self.c = np.full((self.c_len, self.sims), TMAX, dtype=np.float32) |
|
|
|
self.s = np.zeros((11, self.s_len, sims), dtype=np.float32) |
|
|
|
self.s = np.zeros((11, self.s_len, self.sims), dtype=np.float32) |
|
|
|
"""Information about the logic values and transitions around the sequential elements (flip-flops) and ports. |
|
|
|
"""Information about the logic values and transitions around the sequential elements (flip-flops) and ports. |
|
|
|
|
|
|
|
|
|
|
|
The first 3 values are read by :py:func:`s_to_c`. |
|
|
|
The first 3 values are read by :py:func:`s_to_c`. |
|
|
@ -98,7 +99,26 @@ class WaveSim(sim.SimOps): |
|
|
|
self.simctl_int[0] = range(sims) # unique seed for each sim by default, zero this to pick same delays for all sims. |
|
|
|
self.simctl_int[0] = range(sims) # unique seed for each sim by default, zero this to pick same delays for all sims. |
|
|
|
self.simctl_int[1] = 2 # random picking by default. |
|
|
|
self.simctl_int[1] = 2 # random picking by default. |
|
|
|
|
|
|
|
|
|
|
|
self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.c_locs, self.c_caps, self.ops, self.simctl_int)]) |
|
|
|
# flat array for line use information |
|
|
|
|
|
|
|
line_use = defaultdict(list) |
|
|
|
|
|
|
|
for lidx in range(len(self.circuit.lines)): |
|
|
|
|
|
|
|
if self.line_use_start[lidx] < 0: continue |
|
|
|
|
|
|
|
if self.line_use_stop[lidx] < 0: |
|
|
|
|
|
|
|
log.warn(f'line {lidx} never read?') |
|
|
|
|
|
|
|
for i in range(self.line_use_start[lidx], self.line_use_stop[lidx]): |
|
|
|
|
|
|
|
line_use[i].append(lidx) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.line_use_counts = np.array([len(line_use[i]) for i in range(len(self.levels))], dtype=np.int32) |
|
|
|
|
|
|
|
self.line_use_offsets = np.zeros_like(self.line_use_counts) |
|
|
|
|
|
|
|
self.line_use_offsets[1:] = self.line_use_counts.cumsum()[:-1] |
|
|
|
|
|
|
|
self.line_use = np.hstack([line_use[i] for i in range(len(self.levels))]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.h = np.zeros((self.c_locs_len, sims), dtype=np.float32) # hashes of generated waveforms |
|
|
|
|
|
|
|
self.h_base = np.zeros_like(self.h) # base hashes to compare to |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.error_counts = np.zeros(self.s_len, dtype=np.uint32) # number of capture errors by PPO |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.h, self.c_locs, self.c_caps, self.ops, self.simctl_int)]) |
|
|
|
|
|
|
|
|
|
|
|
def __repr__(self): |
|
|
|
def __repr__(self): |
|
|
|
dev = 'GPU' if hasattr(self.c, 'copy_to_host') else 'CPU' |
|
|
|
dev = 'GPU' if hasattr(self.c, 'copy_to_host') else 'CPU' |
|
|
@ -124,7 +144,7 @@ class WaveSim(sim.SimOps): |
|
|
|
""" |
|
|
|
""" |
|
|
|
sims = min(sims or self.sims, self.sims) |
|
|
|
sims = min(sims or self.sims, self.sims) |
|
|
|
for op_start, op_stop in zip(self.level_starts, self.level_stops): |
|
|
|
for op_start, op_stop in zip(self.level_starts, self.level_stops): |
|
|
|
level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.abuf, 0, sims, self.delays, self.simctl_int, seed) |
|
|
|
level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.h, self.abuf, 0, sims, self.delays, self.simctl_int, seed) |
|
|
|
|
|
|
|
|
|
|
|
def c_to_s(self, time=TMAX, sd=0.0, seed=1): |
|
|
|
def c_to_s(self, time=TMAX, sd=0.0, seed=1): |
|
|
|
"""Simulates a capture operation at all sequential elements and primary outputs. |
|
|
|
"""Simulates a capture operation at all sequential elements and primary outputs. |
|
|
@ -152,7 +172,7 @@ class WaveSim(sim.SimOps): |
|
|
|
self.s[2, self.ppio_s_locs] = self.s[8, self.ppio_s_locs] |
|
|
|
self.s[2, self.ppio_s_locs] = self.s[8, self.ppio_s_locs] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0): |
|
|
|
def _wave_eval(op, cbuf, c_locs, c_caps, hbuf, sim, delays, simctl_int, seed): |
|
|
|
overflows = int(0) |
|
|
|
overflows = int(0) |
|
|
|
|
|
|
|
|
|
|
|
lut = op[0] |
|
|
|
lut = op[0] |
|
|
@ -182,6 +202,8 @@ def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0): |
|
|
|
z_mem = c_locs[z_idx] |
|
|
|
z_mem = c_locs[z_idx] |
|
|
|
z_cap = c_caps[z_idx] |
|
|
|
z_cap = c_caps[z_idx] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
h = np.float32(0) |
|
|
|
|
|
|
|
|
|
|
|
a_cur = int(0) |
|
|
|
a_cur = int(0) |
|
|
|
b_cur = int(0) |
|
|
|
b_cur = int(0) |
|
|
|
c_cur = int(0) |
|
|
|
c_cur = int(0) |
|
|
@ -229,6 +251,7 @@ def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0): |
|
|
|
next_t = cbuf[d_mem + d_cur, sim] + delays[d_idx, (d_cur & 1) ^ 1, z_val ^ 1] |
|
|
|
next_t = cbuf[d_mem + d_cur, sim] + delays[d_idx, (d_cur & 1) ^ 1, z_val ^ 1] |
|
|
|
|
|
|
|
|
|
|
|
if (z_cur & 1) != ((lut >> inputs) & 1): |
|
|
|
if (z_cur & 1) != ((lut >> inputs) & 1): |
|
|
|
|
|
|
|
h += h*3 + max(current_t, -10) # hash based on generated transitions before filtering |
|
|
|
# we generate an edge in z_mem, if ... |
|
|
|
# we generate an edge in z_mem, if ... |
|
|
|
if (z_cur == 0 # it is the first edge in z_mem ... |
|
|
|
if (z_cur == 0 # it is the first edge in z_mem ... |
|
|
|
or next_t < current_t # -OR- the next edge on SAME input is EARLIER (need current edge to filter BOTH in next iteration) ... |
|
|
|
or next_t < current_t # -OR- the next edge on SAME input is EARLIER (need current edge to filter BOTH in next iteration) ... |
|
|
@ -240,8 +263,8 @@ def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0): |
|
|
|
z_cur += 1 |
|
|
|
z_cur += 1 |
|
|
|
else: |
|
|
|
else: |
|
|
|
overflows += 1 |
|
|
|
overflows += 1 |
|
|
|
previous_t = cbuf[z_mem + z_cur - 1, sim] |
|
|
|
|
|
|
|
z_cur -= 1 |
|
|
|
z_cur -= 1 |
|
|
|
|
|
|
|
previous_t = cbuf[z_mem + z_cur, sim] |
|
|
|
else: |
|
|
|
else: |
|
|
|
z_cur -= 1 |
|
|
|
z_cur -= 1 |
|
|
|
previous_t = cbuf[z_mem + z_cur - 1, sim] if z_cur > 0 else TMIN |
|
|
|
previous_t = cbuf[z_mem + z_cur - 1, sim] if z_cur > 0 else TMIN |
|
|
@ -258,6 +281,8 @@ def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0): |
|
|
|
# generate or propagate overflow flag |
|
|
|
# generate or propagate overflow flag |
|
|
|
cbuf[z_mem + z_cur, sim] = TMAX_OVL if overflows > 0 else max(a, b, c, d) |
|
|
|
cbuf[z_mem + z_cur, sim] = TMAX_OVL if overflows > 0 else max(a, b, c, d) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hbuf[z_idx, sim] = h |
|
|
|
|
|
|
|
|
|
|
|
nrise = max(0, (z_cur+1) // 2 - (cbuf[z_mem, sim] == TMIN)) |
|
|
|
nrise = max(0, (z_cur+1) // 2 - (cbuf[z_mem, sim] == TMIN)) |
|
|
|
nfall = z_cur // 2 |
|
|
|
nfall = z_cur // 2 |
|
|
|
|
|
|
|
|
|
|
@ -268,11 +293,11 @@ wave_eval_cpu = numba.njit(_wave_eval) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@numba.njit |
|
|
|
@numba.njit |
|
|
|
def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, abuf, sim_start, sim_stop, delays, simctl_int, seed): |
|
|
|
def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, hbuf, abuf, sim_start, sim_stop, delays, simctl_int, seed): |
|
|
|
for op_idx in range(op_start, op_stop): |
|
|
|
for op_idx in range(op_start, op_stop): |
|
|
|
op = ops[op_idx] |
|
|
|
op = ops[op_idx] |
|
|
|
for sim in range(sim_start, sim_stop): |
|
|
|
for sim in range(sim_start, sim_stop): |
|
|
|
nrise, nfall = wave_eval_cpu(op, c, c_locs, c_caps, sim, delays, simctl_int[:, sim], seed) |
|
|
|
nrise, nfall = wave_eval_cpu(op, c, c_locs, c_caps, hbuf, sim, delays, simctl_int[:, sim], seed) |
|
|
|
a_loc = op[6] |
|
|
|
a_loc = op[6] |
|
|
|
a_wr = op[7] |
|
|
|
a_wr = op[7] |
|
|
|
a_wf = op[8] |
|
|
|
a_wf = op[8] |
|
|
@ -345,12 +370,18 @@ class WaveSimCuda(WaveSim): |
|
|
|
self.delays = cuda.to_device(self.delays) |
|
|
|
self.delays = cuda.to_device(self.delays) |
|
|
|
self.simctl_int = cuda.to_device(self.simctl_int) |
|
|
|
self.simctl_int = cuda.to_device(self.simctl_int) |
|
|
|
self.abuf = cuda.to_device(self.abuf) |
|
|
|
self.abuf = cuda.to_device(self.abuf) |
|
|
|
|
|
|
|
self.h = cuda.to_device(self.h) |
|
|
|
|
|
|
|
self.h_base = cuda.to_device(self.h_base) |
|
|
|
|
|
|
|
self.line_use = cuda.to_device(self.line_use) |
|
|
|
|
|
|
|
self.error_counts = cuda.to_device(self.error_counts) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.retval_int = cuda.to_device(np.array([0], dtype=np.int32)) |
|
|
|
|
|
|
|
|
|
|
|
self._block_dim = (32, 16) |
|
|
|
self._block_dim = (32, 16) |
|
|
|
|
|
|
|
|
|
|
|
def __getstate__(self): |
|
|
|
def __getstate__(self): |
|
|
|
state = self.__dict__.copy() |
|
|
|
state = self.__dict__.copy() |
|
|
|
state['c'] = np.array(self.c) |
|
|
|
del state['c'] |
|
|
|
state['s'] = np.array(self.s) |
|
|
|
state['s'] = np.array(self.s) |
|
|
|
state['ops'] = np.array(self.ops) |
|
|
|
state['ops'] = np.array(self.ops) |
|
|
|
state['c_locs'] = np.array(self.c_locs) |
|
|
|
state['c_locs'] = np.array(self.c_locs) |
|
|
@ -358,11 +389,16 @@ class WaveSimCuda(WaveSim): |
|
|
|
state['delays'] = np.array(self.delays) |
|
|
|
state['delays'] = np.array(self.delays) |
|
|
|
state['simctl_int'] = np.array(self.simctl_int) |
|
|
|
state['simctl_int'] = np.array(self.simctl_int) |
|
|
|
state['abuf'] = np.array(self.abuf) |
|
|
|
state['abuf'] = np.array(self.abuf) |
|
|
|
|
|
|
|
state['h'] = np.array(self.h) |
|
|
|
|
|
|
|
state['h_base'] = np.array(self.h_base) |
|
|
|
|
|
|
|
state['line_use'] = np.array(self.line_use) |
|
|
|
|
|
|
|
state['error_counts'] = np.array(self.error_counts) |
|
|
|
|
|
|
|
state['retval_int'] = np.array(self.retval_int) |
|
|
|
return state |
|
|
|
return state |
|
|
|
|
|
|
|
|
|
|
|
def __setstate__(self, state): |
|
|
|
def __setstate__(self, state): |
|
|
|
self.__dict__.update(state) |
|
|
|
self.__dict__.update(state) |
|
|
|
self.c = cuda.to_device(self.c) |
|
|
|
self.c = cuda.to_device(np.full((self.c_len, self.sims), TMAX, dtype=np.float32)) |
|
|
|
self.s = cuda.to_device(self.s) |
|
|
|
self.s = cuda.to_device(self.s) |
|
|
|
self.ops = cuda.to_device(self.ops) |
|
|
|
self.ops = cuda.to_device(self.ops) |
|
|
|
self.c_locs = cuda.to_device(self.c_locs) |
|
|
|
self.c_locs = cuda.to_device(self.c_locs) |
|
|
@ -370,6 +406,11 @@ class WaveSimCuda(WaveSim): |
|
|
|
self.delays = cuda.to_device(self.delays) |
|
|
|
self.delays = cuda.to_device(self.delays) |
|
|
|
self.simctl_int = cuda.to_device(self.simctl_int) |
|
|
|
self.simctl_int = cuda.to_device(self.simctl_int) |
|
|
|
self.abuf = cuda.to_device(self.abuf) |
|
|
|
self.abuf = cuda.to_device(self.abuf) |
|
|
|
|
|
|
|
self.h = cuda.to_device(self.h) |
|
|
|
|
|
|
|
self.h_base = cuda.to_device(self.h_base) |
|
|
|
|
|
|
|
self.line_use = cuda.to_device(self.line_use) |
|
|
|
|
|
|
|
self.error_counts = cuda.to_device(self.error_counts) |
|
|
|
|
|
|
|
self.retval_int = cuda.to_device(self.retval_int) |
|
|
|
|
|
|
|
|
|
|
|
def s_to_c(self): |
|
|
|
def s_to_c(self): |
|
|
|
grid_dim = self._grid_dim(self.sims, self.s_len) |
|
|
|
grid_dim = self._grid_dim(self.sims, self.s_len) |
|
|
@ -383,10 +424,18 @@ class WaveSimCuda(WaveSim): |
|
|
|
if op_from > op_start: continue |
|
|
|
if op_from > op_start: continue |
|
|
|
if op_to is not None and op_to <= op_start: break |
|
|
|
if op_to is not None and op_to <= op_start: break |
|
|
|
grid_dim = self._grid_dim(sims, op_stop - op_start) |
|
|
|
grid_dim = self._grid_dim(sims, op_stop - op_start) |
|
|
|
wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.abuf, int(0), |
|
|
|
wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.h, self.abuf, int(0), |
|
|
|
sims, self.delays, self.simctl_int, seed) |
|
|
|
sims, self.delays, self.simctl_int, seed) |
|
|
|
cuda.synchronize() |
|
|
|
cuda.synchronize() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def c_prop_level(self, level, sims=None, seed=1): |
|
|
|
|
|
|
|
sims = min(sims or self.sims, self.sims) |
|
|
|
|
|
|
|
op_start = self.level_starts[level] |
|
|
|
|
|
|
|
op_stop = self.level_stops[level] |
|
|
|
|
|
|
|
grid_dim = self._grid_dim(sims, op_stop - op_start) |
|
|
|
|
|
|
|
wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.h, self.abuf, int(0), |
|
|
|
|
|
|
|
sims, self.delays, self.simctl_int, seed) |
|
|
|
|
|
|
|
|
|
|
|
def c_to_s(self, time=TMAX, sd=0.0, seed=1): |
|
|
|
def c_to_s(self, time=TMAX, sd=0.0, seed=1): |
|
|
|
grid_dim = self._grid_dim(self.sims, self.s_len) |
|
|
|
grid_dim = self._grid_dim(self.sims, self.s_len) |
|
|
|
wave_capture_gpu[grid_dim, self._block_dim](self.c, self.s, self.c_locs, self.c_caps, self.ppo_offset, |
|
|
|
wave_capture_gpu[grid_dim, self._block_dim](self.c, self.s, self.c_locs, self.c_caps, self.ppo_offset, |
|
|
@ -396,6 +445,52 @@ class WaveSimCuda(WaveSim): |
|
|
|
grid_dim = self._grid_dim(self.sims, self.s_len) |
|
|
|
grid_dim = self._grid_dim(self.sims, self.s_len) |
|
|
|
ppo_to_ppi_gpu[grid_dim, self._block_dim](self.s, self.c_locs, time, self.ppi_offset, self.ppo_offset) |
|
|
|
ppo_to_ppi_gpu[grid_dim, self._block_dim](self.s, self.c_locs, time, self.ppi_offset, self.ppo_offset) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def set_base_hashes(self): |
|
|
|
|
|
|
|
nitems = self.h_base.shape[0] * self.h_base.shape[1] |
|
|
|
|
|
|
|
grid_dim = cdiv(nitems, 256) |
|
|
|
|
|
|
|
memcpy_gpu[grid_dim, 256](self.h, self.h_base, nitems) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def compare_hashes_level(self, lv): |
|
|
|
|
|
|
|
self.retval_int[0] = 0 |
|
|
|
|
|
|
|
grid_dim = self._grid_dim(self.sims, self.line_use_counts[lv]) |
|
|
|
|
|
|
|
diff_hash_gpu[grid_dim, self._block_dim](self.h, self.h_base, self.line_use, self.line_use_offsets[lv], |
|
|
|
|
|
|
|
self.line_use_counts[lv], self.retval_int) |
|
|
|
|
|
|
|
return self.retval_int[0] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def calc_error_counts(self, sims=None): |
|
|
|
|
|
|
|
sims = min(sims or self.sims, self.sims) |
|
|
|
|
|
|
|
grid_dim = cdiv(self.s_len, 256) |
|
|
|
|
|
|
|
calc_error_counts_gpu[grid_dim, 256](self.s, sims, self.error_counts) |
|
|
|
|
|
|
|
return np.array(self.error_counts) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@cuda.jit() |
|
|
|
|
|
|
|
def memcpy_gpu (src, dst, nitems): |
|
|
|
|
|
|
|
tid = cuda.grid(1) |
|
|
|
|
|
|
|
stride = cuda.gridDim.x * cuda.blockDim.x |
|
|
|
|
|
|
|
for i in range(tid, nitems, stride): |
|
|
|
|
|
|
|
dst.flat[i] = src.flat[i] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@cuda.jit() |
|
|
|
|
|
|
|
def diff_hash_gpu(hbuf1, hbuf2, h_locs, h_locs_offset, h_locs_cnt, differs): |
|
|
|
|
|
|
|
x, y = cuda.grid(2) |
|
|
|
|
|
|
|
if x >= hbuf1.shape[1]: return |
|
|
|
|
|
|
|
if y >= h_locs_cnt: return |
|
|
|
|
|
|
|
h_loc = h_locs[h_locs_offset+y] |
|
|
|
|
|
|
|
if hbuf1[h_loc, x] != hbuf2[h_loc, x]: |
|
|
|
|
|
|
|
differs[0] = 1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@cuda.jit() |
|
|
|
|
|
|
|
def calc_error_counts_gpu(s, sims, error_counts): |
|
|
|
|
|
|
|
x = cuda.grid(1) |
|
|
|
|
|
|
|
if x >= s.shape[1]: return |
|
|
|
|
|
|
|
cnt = 0 |
|
|
|
|
|
|
|
for i in range(sims): |
|
|
|
|
|
|
|
cnt += (s[6,x,i] != s[8,x,i]) |
|
|
|
|
|
|
|
error_counts[x] = cnt |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@cuda.jit() |
|
|
|
@cuda.jit() |
|
|
|
def wave_assign_gpu(c, s, c_locs, ppi_offset): |
|
|
|
def wave_assign_gpu(c, s, c_locs, ppi_offset): |
|
|
@ -425,7 +520,7 @@ _wave_eval_gpu = cuda.jit(_wave_eval, device=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@cuda.jit() |
|
|
|
@cuda.jit() |
|
|
|
def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, abuf, sim_start, sim_stop, delays, simctl_int, seed): |
|
|
|
def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, hbuf, abuf, sim_start, sim_stop, delays, simctl_int, seed): |
|
|
|
x, y = cuda.grid(2) |
|
|
|
x, y = cuda.grid(2) |
|
|
|
sim = sim_start + x |
|
|
|
sim = sim_start + x |
|
|
|
op_idx = op_start + y |
|
|
|
op_idx = op_start + y |
|
|
@ -437,7 +532,7 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, abuf, sim_start, |
|
|
|
a_wr = op[7] |
|
|
|
a_wr = op[7] |
|
|
|
a_wf = op[8] |
|
|
|
a_wf = op[8] |
|
|
|
|
|
|
|
|
|
|
|
nrise, nfall = _wave_eval_gpu(op, cbuf, c_locs, c_caps, sim, delays, simctl_int[:, sim], seed) |
|
|
|
nrise, nfall = _wave_eval_gpu(op, cbuf, c_locs, c_caps, hbuf, sim, delays, simctl_int[:, sim], seed) |
|
|
|
|
|
|
|
|
|
|
|
# accumulate WSA into abuf |
|
|
|
# accumulate WSA into abuf |
|
|
|
if a_loc >= 0: |
|
|
|
if a_loc >= 0: |
|
|
|