| 
						
						
							
								
							
						
						
					 | 
					 | 
					@ -47,8 +47,8 @@ class WaveSim(sim.SimOps): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    :param c_reuse: If enabled, memory of intermediate signal waveforms will be re-used. This greatly reduces | 
					 | 
					 | 
					 | 
					    :param c_reuse: If enabled, memory of intermediate signal waveforms will be re-used. This greatly reduces | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        memory footprint, but intermediate signal waveforms become unaccessible after a propagation. | 
					 | 
					 | 
					 | 
					        memory footprint, but intermediate signal waveforms become unaccessible after a propagation. | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    """ | 
					 | 
					 | 
					 | 
					    """ | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    def __init__(self, circuit, delays, sims=8, c_caps=16, c_reuse=False, strip_forks=False): | 
					 | 
					 | 
					 | 
					    def __init__(self, circuit, delays, sims=8, c_caps=16, a_ctrl=None, c_reuse=False, strip_forks=False): | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        super().__init__(circuit, c_caps=c_caps, c_caps_min=4, c_reuse=c_reuse, strip_forks=strip_forks) | 
					 | 
					 | 
					 | 
					        super().__init__(circuit, c_caps=c_caps, c_caps_min=4, a_ctrl=a_ctrl, c_reuse=c_reuse, strip_forks=strip_forks) | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.sims = sims | 
					 | 
					 | 
					 | 
					        self.sims = sims | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        if delays.ndim == 3: delays = np.expand_dims(delays, axis=0) | 
					 | 
					 | 
					 | 
					        if delays.ndim == 3: delays = np.expand_dims(delays, axis=0) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.delays = np.zeros((len(delays), self.c_locs_len, 2, 2), dtype=delays.dtype) | 
					 | 
					 | 
					 | 
					        self.delays = np.zeros((len(delays), self.c_locs_len, 2, 2), dtype=delays.dtype) | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -78,6 +78,9 @@ class WaveSim(sim.SimOps): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					          final values in the waveforms are still valid. | 
					 | 
					 | 
					 | 
					          final values in the waveforms are still valid. | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        """ | 
					 | 
					 | 
					 | 
					        """ | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					        self.abuf_len = self.ops[:,6].max() + 1 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					        self.abuf = np.zeros((self.abuf_len, sims), dtype=np.int32) if self.abuf_len > 0 else np.zeros((1, 1), dtype=np.int32) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.simctl_int = np.zeros((2, sims), dtype=np.int32) | 
					 | 
					 | 
					 | 
					        self.simctl_int = np.zeros((2, sims), dtype=np.int32) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        """Per-simulation delay configuration. | 
					 | 
					 | 
					 | 
					        """Per-simulation delay configuration. | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -113,7 +116,7 @@ class WaveSim(sim.SimOps): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        """ | 
					 | 
					 | 
					 | 
					        """ | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        sims = min(sims or self.sims, self.sims) | 
					 | 
					 | 
					 | 
					        sims = min(sims or self.sims, self.sims) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        for op_start, op_stop in zip(self.level_starts, self.level_stops): | 
					 | 
					 | 
					 | 
					        for op_start, op_stop in zip(self.level_starts, self.level_stops): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, 0, sims, self.delays, self.simctl_int, seed) | 
					 | 
					 | 
					 | 
					            level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.abuf, 0, sims, self.delays, self.simctl_int, seed) | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    def c_to_s(self, time=TMAX, sd=0.0, seed=1): | 
					 | 
					 | 
					 | 
					    def c_to_s(self, time=TMAX, sd=0.0, seed=1): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        """Simulates a capture operation at all sequential elements and primary outputs. | 
					 | 
					 | 
					 | 
					        """Simulates a capture operation at all sequential elements and primary outputs. | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -141,9 +144,16 @@ class WaveSim(sim.SimOps): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.s[2, self.ppio_s_locs] = self.s[8, self.ppio_s_locs] | 
					 | 
					 | 
					 | 
					        self.s[2, self.ppio_s_locs] = self.s[8, self.ppio_s_locs] | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					def _wave_eval(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0): | 
					 | 
					 | 
					 | 
					def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0): | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    overflows = int(0) | 
					 | 
					 | 
					 | 
					    overflows = int(0) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    lut = op[0] | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    z_idx = op[1] | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    a_idx = op[2] | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    b_idx = op[3] | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    c_idx = op[4] | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    d_idx = op[5] | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    if len(delays) > 1: | 
					 | 
					 | 
					 | 
					    if len(delays) > 1: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        if simctl_int[1] == 0: | 
					 | 
					 | 
					 | 
					        if simctl_int[1] == 0: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            delays = delays[seed] | 
					 | 
					 | 
					 | 
					            delays = delays[seed] | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -240,22 +250,26 @@ def _wave_eval(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, sim | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    # generate or propagate overflow flag | 
					 | 
					 | 
					 | 
					    # generate or propagate overflow flag | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    cbuf[z_mem + z_cur, sim] = TMAX_OVL if overflows > 0 else max(a, b, c, d) | 
					 | 
					 | 
					 | 
					    cbuf[z_mem + z_cur, sim] = TMAX_OVL if overflows > 0 else max(a, b, c, d) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    nrise = max(0, (z_cur+1) // 2 - (cbuf[z_mem, sim] == TMIN)) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    nfall = z_cur // 2 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					_wave_eval_cpu = numba.njit(_wave_eval) | 
					 | 
					 | 
					 | 
					    return nrise, nfall | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					@numba.njit | 
					 | 
					 | 
					 | 
					wave_eval_cpu = numba.njit(_wave_eval) | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					def wave_eval_cpu(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0): | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    lut, z_idx, a_idx, b_idx, c_idx, d_idx = op | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    _wave_eval_cpu(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed) | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					@numba.njit | 
					 | 
					 | 
					 | 
					@numba.njit | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, sim_start, sim_stop, delays, simctl_int, seed): | 
					 | 
					 | 
					 | 
					def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, abuf, sim_start, sim_stop, delays, simctl_int, seed): | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    for op_idx in range(op_start, op_stop): | 
					 | 
					 | 
					 | 
					    for op_idx in range(op_start, op_stop): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        op = ops[op_idx] | 
					 | 
					 | 
					 | 
					        op = ops[op_idx] | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        for sim in range(sim_start, sim_stop): | 
					 | 
					 | 
					 | 
					        for sim in range(sim_start, sim_stop): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            wave_eval_cpu(op, c, c_locs, c_caps, sim, delays, simctl_int[:, sim], seed) | 
					 | 
					 | 
					 | 
					            nrise, nfall = wave_eval_cpu(op, c, c_locs, c_caps, sim, delays, simctl_int[:, sim], seed) | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					            a_loc = op[6] | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					            a_wr = op[7] | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					            a_wf = op[8] | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					            if a_loc >= 0: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					                abuf[a_loc, sim] += nrise*a_wr + nfall*a_wf | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					@numba.njit | 
					 | 
					 | 
					 | 
					@numba.njit | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -311,8 +325,8 @@ class WaveSimCuda(WaveSim): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    All internal memories are mirrored into GPU memory upon construction. | 
					 | 
					 | 
					 | 
					    All internal memories are mirrored into GPU memory upon construction. | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    Some operations like access to single waveforms can involve large communication overheads. | 
					 | 
					 | 
					 | 
					    Some operations like access to single waveforms can involve large communication overheads. | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    """ | 
					 | 
					 | 
					 | 
					    """ | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    def __init__(self, circuit, delays, sims=8, c_caps=16, c_reuse=False, strip_forks=False): | 
					 | 
					 | 
					 | 
					    def __init__(self, circuit, delays, sims=8, c_caps=16, a_ctrl=None, c_reuse=False, strip_forks=False): | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        super().__init__(circuit, delays, sims, c_caps, c_reuse, strip_forks) | 
					 | 
					 | 
					 | 
					        super().__init__(circuit, delays, sims, c_caps, a_ctrl=a_ctrl, c_reuse=c_reuse, strip_forks=strip_forks) | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.c = cuda.to_device(self.c) | 
					 | 
					 | 
					 | 
					        self.c = cuda.to_device(self.c) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.s = cuda.to_device(self.s) | 
					 | 
					 | 
					 | 
					        self.s = cuda.to_device(self.s) | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -321,6 +335,7 @@ class WaveSimCuda(WaveSim): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.c_caps = cuda.to_device(self.c_caps) | 
					 | 
					 | 
					 | 
					        self.c_caps = cuda.to_device(self.c_caps) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.delays = cuda.to_device(self.delays) | 
					 | 
					 | 
					 | 
					        self.delays = cuda.to_device(self.delays) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.simctl_int = cuda.to_device(self.simctl_int) | 
					 | 
					 | 
					 | 
					        self.simctl_int = cuda.to_device(self.simctl_int) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					        self.abuf = cuda.to_device(self.abuf) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self._block_dim = (32, 16) | 
					 | 
					 | 
					 | 
					        self._block_dim = (32, 16) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -333,6 +348,7 @@ class WaveSimCuda(WaveSim): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        state['c_caps'] = np.array(self.c_caps) | 
					 | 
					 | 
					 | 
					        state['c_caps'] = np.array(self.c_caps) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        state['delays'] = np.array(self.delays) | 
					 | 
					 | 
					 | 
					        state['delays'] = np.array(self.delays) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        state['simctl_int'] = np.array(self.simctl_int) | 
					 | 
					 | 
					 | 
					        state['simctl_int'] = np.array(self.simctl_int) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					        state['abuf'] = np.array(self.abuf) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        return state | 
					 | 
					 | 
					 | 
					        return state | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    def __setstate__(self, state): | 
					 | 
					 | 
					 | 
					    def __setstate__(self, state): | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -344,6 +360,7 @@ class WaveSimCuda(WaveSim): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.c_caps = cuda.to_device(self.c_caps) | 
					 | 
					 | 
					 | 
					        self.c_caps = cuda.to_device(self.c_caps) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.delays = cuda.to_device(self.delays) | 
					 | 
					 | 
					 | 
					        self.delays = cuda.to_device(self.delays) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.simctl_int = cuda.to_device(self.simctl_int) | 
					 | 
					 | 
					 | 
					        self.simctl_int = cuda.to_device(self.simctl_int) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					        self.abuf = cuda.to_device(self.abuf) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    def s_to_c(self): | 
					 | 
					 | 
					 | 
					    def s_to_c(self): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        grid_dim = self._grid_dim(self.sims, self.s_len) | 
					 | 
					 | 
					 | 
					        grid_dim = self._grid_dim(self.sims, self.s_len) | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -355,7 +372,7 @@ class WaveSimCuda(WaveSim): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        sims = min(sims or self.sims, self.sims) | 
					 | 
					 | 
					 | 
					        sims = min(sims or self.sims, self.sims) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        for op_start, op_stop in zip(self.level_starts, self.level_stops): | 
					 | 
					 | 
					 | 
					        for op_start, op_stop in zip(self.level_starts, self.level_stops): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            grid_dim = self._grid_dim(sims, op_stop - op_start) | 
					 | 
					 | 
					 | 
					            grid_dim = self._grid_dim(sims, op_stop - op_start) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, int(0), | 
					 | 
					 | 
					 | 
					            wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.abuf, int(0), | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					                sims, self.delays, self.simctl_int, seed) | 
					 | 
					 | 
					 | 
					                sims, self.delays, self.simctl_int, seed) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        cuda.synchronize() | 
					 | 
					 | 
					 | 
					        cuda.synchronize() | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -397,21 +414,24 @@ _wave_eval_gpu = cuda.jit(_wave_eval, device=True) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					@cuda.jit() | 
					 | 
					 | 
					 | 
					@cuda.jit() | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, sim_start, sim_stop, delays, simctl_int, seed): | 
					 | 
					 | 
					 | 
					def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, abuf, sim_start, sim_stop, delays, simctl_int, seed): | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    x, y = cuda.grid(2) | 
					 | 
					 | 
					 | 
					    x, y = cuda.grid(2) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    sim = sim_start + x | 
					 | 
					 | 
					 | 
					    sim = sim_start + x | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    op_idx = op_start + y | 
					 | 
					 | 
					 | 
					    op_idx = op_start + y | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    if sim >= sim_stop: return | 
					 | 
					 | 
					 | 
					    if sim >= sim_stop: return | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    if op_idx >= op_stop: return | 
					 | 
					 | 
					 | 
					    if op_idx >= op_stop: return | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    lut = ops[op_idx, 0] | 
					 | 
					 | 
					 | 
					    op = ops[op_idx] | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    z_idx = ops[op_idx, 1] | 
					 | 
					 | 
					 | 
					    a_loc = op[6] | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    a_idx = ops[op_idx, 2] | 
					 | 
					 | 
					 | 
					    a_wr = op[7] | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    b_idx = ops[op_idx, 3] | 
					 | 
					 | 
					 | 
					    a_wf = op[8] | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    c_idx = ops[op_idx, 4] | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    d_idx = ops[op_idx, 5] | 
					 | 
					 | 
					 | 
					    nrise, nfall = _wave_eval_gpu(op, cbuf, c_locs, c_caps, sim, delays, simctl_int[:, sim], seed) | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    _wave_eval_gpu(lut, z_idx, a_idx, b_idx, c_idx, d_idx, cbuf, c_locs, c_caps, sim, delays, simctl_int[:, sim], seed) | 
					 | 
					 | 
					 | 
					    # accumulate WSA into abuf | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    if a_loc >= 0: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					        #abuf[a_loc, sim] += nrise*a_wr + nfall*a_wf | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					        cuda.atomic.add(abuf, (a_loc, sim), nrise*a_wr + nfall*a_wf) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					@cuda.jit() | 
					 | 
					 | 
					 | 
					@cuda.jit() | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
						
					 | 
					 | 
					
  |