| 
						
						
							
								
							
						
						
					 | 
					 | 
					@ -16,8 +16,7 @@ import math | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					import numpy as np | 
					 | 
					 | 
					 | 
					import numpy as np | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					from . import numba, cuda, hr_bytes | 
					 | 
					 | 
					 | 
					from . import numba, cuda, hr_bytes, sim | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					from .sim import SimOps | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					TMAX = np.float32(2 ** 127) | 
					 | 
					 | 
					 | 
					TMAX = np.float32(2 ** 127) | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -29,7 +28,7 @@ TMIN = np.float32(-2 ** 127) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					"""A large negative 32-bit floating point value used at the beginning of waveforms that start with logic-1.""" | 
					 | 
					 | 
					 | 
					"""A large negative 32-bit floating point value used at the beginning of waveforms that start with logic-1.""" | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					class WaveSim(SimOps): | 
					 | 
					 | 
					 | 
					class WaveSim(sim.SimOps): | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    """A waveform-based combinational logic timing simulator running on CPU. | 
					 | 
					 | 
					 | 
					    """A waveform-based combinational logic timing simulator running on CPU. | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    :param circuit: The circuit to simulate. | 
					 | 
					 | 
					 | 
					    :param circuit: The circuit to simulate. | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -54,30 +53,31 @@ class WaveSim(SimOps): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.sims = sims | 
					 | 
					 | 
					 | 
					        self.sims = sims | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.c_len *= 4 | 
					 | 
					 | 
					 | 
					        self.c_len *= 4 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.vat[...,0:2] *= 4 | 
					 | 
					 | 
					 | 
					        self.c_locs[...] *= 4 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					        self.c_caps[...] *= 4 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.timing = np.zeros((self.vat_len, 2, 2)) | 
					 | 
					 | 
					 | 
					        self.timing = np.zeros((self.c_locs_len, 2, 2)) | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.timing[:len(timing)] = timing | 
					 | 
					 | 
					 | 
					        self.timing[:len(timing)] = timing | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.c = np.zeros((self.c_len, sims), dtype=np.float32) + TMAX | 
					 | 
					 | 
					 | 
					        self.c = np.zeros((self.c_len, sims), dtype=np.float32) + TMAX | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.s = np.zeros((len(self.s_nodes), sims, 11), dtype=np.float32) | 
					 | 
					 | 
					 | 
					        self.s = np.zeros((11, self.s_len, sims), dtype=np.float32) | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        """Information about the logic values and transitions around the sequential elements (flip-flops) and ports. | 
					 | 
					 | 
					 | 
					        """Information about the logic values and transitions around the sequential elements (flip-flops) and ports. | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        The first 3 values are read by ``s_to_c()``. | 
					 | 
					 | 
					 | 
					        The first 3 values are read by ``s_to_c()``. | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        The remaining values are written by ``c_to_s()``. | 
					 | 
					 | 
					 | 
					        The remaining values are written by ``c_to_s()``. | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        The elements are as follows: | 
					 | 
					 | 
					 | 
					        The elements are as follows: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        * ``s[..., 0]`` (P)PI initial value | 
					 | 
					 | 
					 | 
					        * ``s[0]`` (P)PI initial value | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        * ``s[..., 1]`` (P)PI transition time | 
					 | 
					 | 
					 | 
					        * ``s[1]`` (P)PI transition time | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        * ``s[..., 2]`` (P)PI final value | 
					 | 
					 | 
					 | 
					        * ``s[2]`` (P)PI final value | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        * ``s[..., 3]`` (P)PO initial value | 
					 | 
					 | 
					 | 
					        * ``s[3]`` (P)PO initial value | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        * ``s[..., 4]`` (P)PO earliest arrival time (EAT): The time at which the output transitioned from its initial value. | 
					 | 
					 | 
					 | 
					        * ``s[4]`` (P)PO earliest arrival time (EAT): The time at which the output transitioned from its initial value. | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        * ``s[..., 5]`` (P)PO latest stabilization time (LST): The time at which the output settled to its final value. | 
					 | 
					 | 
					 | 
					        * ``s[5]`` (P)PO latest stabilization time (LST): The time at which the output settled to its final value. | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        * ``s[..., 6]`` (P)PO final value | 
					 | 
					 | 
					 | 
					        * ``s[6]`` (P)PO final value | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        * ``s[..., 7]`` (P)PO capture value: probability of capturing a 1 at a given capture time | 
					 | 
					 | 
					 | 
					        * ``s[7]`` (P)PO capture value: probability of capturing a 1 at a given capture time | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        * ``s[..., 8]`` (P)PO sampled capture value: decided by random sampling according to a given seed. | 
					 | 
					 | 
					 | 
					        * ``s[8]`` (P)PO sampled capture value: decided by random sampling according to a given seed. | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        * ``s[..., 9]`` (P)PO sampled capture slack: (capture time - LST) - decided by random sampling according to a given seed. | 
					 | 
					 | 
					 | 
					        * ``s[9]`` (P)PO sampled capture slack: (capture time - LST) - decided by random sampling according to a given seed. | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        * ``s[..., 10]`` Overflow indicator: If non-zero, some signals in the input cone of this output had more | 
					 | 
					 | 
					 | 
					        * ``s[10]`` Overflow indicator: If non-zero, some signals in the input cone of this output had more | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					          transitions than specified in ``c_caps``. Some transitions have been discarded, the | 
					 | 
					 | 
					 | 
					          transitions than specified in ``c_caps``. Some transitions have been discarded, the | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					          final values in the waveforms are still valid. | 
					 | 
					 | 
					 | 
					          final values in the waveforms are still valid. | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        """ | 
					 | 
					 | 
					 | 
					        """ | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -85,19 +85,19 @@ class WaveSim(SimOps): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.params = np.zeros((sims, 4), dtype=np.float32) | 
					 | 
					 | 
					 | 
					        self.params = np.zeros((sims, 4), dtype=np.float32) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.params[...,0] = 1.0 | 
					 | 
					 | 
					 | 
					        self.params[...,0] = 1.0 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.vat, self.ops, self.params)]) | 
					 | 
					 | 
					 | 
					        self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.c_locs, self.c_caps, self.ops, self.params)]) | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.pi_s_locs = np.flatnonzero(self.vat[self.ppi_offset+np.arange(len(self.circuit.io_nodes)), 0] >= 0) | 
					 | 
					 | 
					 | 
					        self.pi_s_locs = np.flatnonzero(self.c_locs[self.ppi_offset+np.arange(len(self.circuit.io_nodes))] >= 0) | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.po_s_locs = np.flatnonzero(self.vat[self.ppo_offset+np.arange(len(self.circuit.io_nodes)), 0] >= 0) | 
					 | 
					 | 
					 | 
					        self.po_s_locs = np.flatnonzero(self.c_locs[self.ppo_offset+np.arange(len(self.circuit.io_nodes))] >= 0) | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.ppio_s_locs = np.arange(len(self.circuit.io_nodes), len(self.s_nodes)) | 
					 | 
					 | 
					 | 
					        self.ppio_s_locs = np.arange(len(self.circuit.io_nodes), len(self.s_nodes)) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.pippi_s_locs = np.concatenate([self.pi_s_locs, self.ppio_s_locs]) | 
					 | 
					 | 
					 | 
					        self.pippi_s_locs = np.concatenate([self.pi_s_locs, self.ppio_s_locs]) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.poppo_s_locs = np.concatenate([self.po_s_locs, self.ppio_s_locs]) | 
					 | 
					 | 
					 | 
					        self.poppo_s_locs = np.concatenate([self.po_s_locs, self.ppio_s_locs]) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.pi_c_locs = self.vat[self.ppi_offset+self.pi_s_locs, 0] | 
					 | 
					 | 
					 | 
					        self.pi_c_locs = self.c_locs[self.ppi_offset+self.pi_s_locs] | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.po_c_locs = self.vat[self.ppo_offset+self.po_s_locs, 0] | 
					 | 
					 | 
					 | 
					        self.po_c_locs = self.c_locs[self.ppo_offset+self.po_s_locs] | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.ppi_c_locs = self.vat[self.ppi_offset+self.ppio_s_locs, 0] | 
					 | 
					 | 
					 | 
					        self.ppi_c_locs = self.c_locs[self.ppi_offset+self.ppio_s_locs] | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.ppo_c_locs = self.vat[self.ppo_offset+self.ppio_s_locs, 0] | 
					 | 
					 | 
					 | 
					        self.ppo_c_locs = self.c_locs[self.ppo_offset+self.ppio_s_locs] | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.pippi_c_locs = np.concatenate([self.pi_c_locs, self.ppi_c_locs]) | 
					 | 
					 | 
					 | 
					        self.pippi_c_locs = np.concatenate([self.pi_c_locs, self.ppi_c_locs]) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.poppo_c_locs = np.concatenate([self.po_c_locs, self.ppo_c_locs]) | 
					 | 
					 | 
					 | 
					        self.poppo_c_locs = np.concatenate([self.po_c_locs, self.ppo_c_locs]) | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -112,7 +112,7 @@ class WaveSim(SimOps): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        Based on the data in ``self.s``, waveforms are generated on the input lines of the circuit. | 
					 | 
					 | 
					 | 
					        Based on the data in ``self.s``, waveforms are generated on the input lines of the circuit. | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        It modifies ``self.c``. | 
					 | 
					 | 
					 | 
					        It modifies ``self.c``. | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        """ | 
					 | 
					 | 
					 | 
					        """ | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        sins = np.moveaxis(self.s[self.pippi_s_locs], -1, 0) | 
					 | 
					 | 
					 | 
					        sins = self.s[:, self.pippi_s_locs] | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        cond = (sins[2] != 0) + 2*(sins[0] != 0)  # choices order: 0 R F 1 | 
					 | 
					 | 
					 | 
					        cond = (sins[2] != 0) + 2*(sins[0] != 0)  # choices order: 0 R F 1 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.c[self.pippi_c_locs] = np.choose(cond, [TMAX, sins[1], TMIN, TMIN]) | 
					 | 
					 | 
					 | 
					        self.c[self.pippi_c_locs] = np.choose(cond, [TMAX, sins[1], TMIN, TMIN]) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.c[self.pippi_c_locs+1] = np.choose(cond, [TMAX, TMAX, sins[1], TMAX]) | 
					 | 
					 | 
					 | 
					        self.c[self.pippi_c_locs+1] = np.choose(cond, [TMAX, TMAX, sins[1], TMAX]) | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -127,7 +127,7 @@ class WaveSim(SimOps): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        """ | 
					 | 
					 | 
					 | 
					        """ | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        sims = min(sims or self.sims, self.sims) | 
					 | 
					 | 
					 | 
					        sims = min(sims or self.sims, self.sims) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        for op_start, op_stop in zip(self.level_starts, self.level_stops): | 
					 | 
					 | 
					 | 
					        for op_start, op_stop in zip(self.level_starts, self.level_stops): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            level_eval_cpu(self.ops, op_start, op_stop, self.c, self.vat, 0, sims, | 
					 | 
					 | 
					 | 
					            level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, 0, sims, | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					                                         self.timing, self.params, sd, seed) | 
					 | 
					 | 
					 | 
					                                         self.timing, self.params, sd, seed) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    def c_to_s(self, time=TMAX, sd=0.0, seed=1): | 
					 | 
					 | 
					 | 
					    def c_to_s(self, time=TMAX, sd=0.0, seed=1): | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -140,9 +140,9 @@ class WaveSim(SimOps): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        :param sd: A standard deviation for uncertainty in the actual capture time. | 
					 | 
					 | 
					 | 
					        :param sd: A standard deviation for uncertainty in the actual capture time. | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        :param seed: The random seed for a capture with uncertainty. | 
					 | 
					 | 
					 | 
					        :param seed: The random seed for a capture with uncertainty. | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        """ | 
					 | 
					 | 
					 | 
					        """ | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        for s_loc, (c_loc, c_len, _) in zip(self.poppo_s_locs, self.vat[self.ppo_offset+self.poppo_s_locs]): | 
					 | 
					 | 
					 | 
					        for s_loc, c_loc, c_len in zip(self.poppo_s_locs, self.c_locs[self.ppo_offset+self.poppo_s_locs], self.c_caps[self.ppo_offset+self.poppo_s_locs]): | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            for vector in range(self.sims): | 
					 | 
					 | 
					 | 
					            for vector in range(self.sims): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					                self.s[s_loc, vector, 3:] = wave_capture_cpu(self.c, c_loc, c_len, vector, time=time, sd=sd, seed=seed) | 
					 | 
					 | 
					 | 
					                self.s[3:, s_loc, vector] = wave_capture_cpu(self.c, c_loc, c_len, vector, time=time, sd=sd, seed=seed) | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    def s_ppo_to_ppi(self, time=0.0): | 
					 | 
					 | 
					 | 
					    def s_ppo_to_ppi(self, time=0.0): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        """Re-assigns the last sampled capture to the appropriate pseudo-primary inputs (PPI).  | 
					 | 
					 | 
					 | 
					        """Re-assigns the last sampled capture to the appropriate pseudo-primary inputs (PPI).  | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -151,9 +151,9 @@ class WaveSim(SimOps): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        :param time: The transition time at the inputs (usually 0.0). | 
					 | 
					 | 
					 | 
					        :param time: The transition time at the inputs (usually 0.0). | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        """ | 
					 | 
					 | 
					 | 
					        """ | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.s[self.ppio_s_locs, :, 0] = self.s[self.ppio_s_locs, :, 2] | 
					 | 
					 | 
					 | 
					        self.s[0, self.ppio_s_locs] = self.s[2, self.ppio_s_locs] | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.s[self.ppio_s_locs, :, 1] = time | 
					 | 
					 | 
					 | 
					        self.s[1, self.ppio_s_locs] = time | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.s[self.ppio_s_locs, :, 2] = self.s[self.ppio_s_locs, :, 8] | 
					 | 
					 | 
					 | 
					        self.s[2, self.ppio_s_locs] = self.s[8, self.ppio_s_locs] | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					@numba.njit | 
					 | 
					 | 
					 | 
					@numba.njit | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -173,7 +173,7 @@ def rand_gauss_cpu(seed, sd): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					@numba.njit | 
					 | 
					 | 
					 | 
					@numba.njit | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					def wave_eval_cpu(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0): | 
					 | 
					 | 
					 | 
					def wave_eval_cpu(op, cbuf, c_locs, c_caps, st_idx, line_times, param, sd=0.0, seed=0): | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    lut, z_idx, a_idx, b_idx, c_idx, d_idx = op | 
					 | 
					 | 
					 | 
					    lut, z_idx, a_idx, b_idx, c_idx, d_idx = op | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    # >>> same code as wave_eval_cpu (except rand_gauss_*pu()-calls) >>> | 
					 | 
					 | 
					 | 
					    # >>> same code as wave_eval_cpu (except rand_gauss_*pu()-calls) >>> | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -181,11 +181,12 @@ def wave_eval_cpu(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    _seed = (seed << 4) + (z_idx << 20) + (st_idx << 1) | 
					 | 
					 | 
					 | 
					    _seed = (seed << 4) + (z_idx << 20) + (st_idx << 1) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    a_mem = vat[a_idx, 0] | 
					 | 
					 | 
					 | 
					    a_mem = c_locs[a_idx] | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    b_mem = vat[b_idx, 0] | 
					 | 
					 | 
					 | 
					    b_mem = c_locs[b_idx] | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    c_mem = vat[c_idx, 0] | 
					 | 
					 | 
					 | 
					    c_mem = c_locs[c_idx] | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    d_mem = vat[d_idx, 0] | 
					 | 
					 | 
					 | 
					    d_mem = c_locs[d_idx] | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    z_mem, z_cap, _ = vat[z_idx] | 
					 | 
					 | 
					 | 
					    z_mem = c_locs[z_idx] | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    z_cap = c_caps[z_idx] | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    a_cur = int(0) | 
					 | 
					 | 
					 | 
					    a_cur = int(0) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    b_cur = int(0) | 
					 | 
					 | 
					 | 
					    b_cur = int(0) | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -280,12 +281,12 @@ def wave_eval_cpu(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					@numba.njit | 
					 | 
					 | 
					 | 
					@numba.njit | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					def level_eval_cpu(ops, op_start, op_stop, c, vat, st_start, st_stop, line_times, params, sd, seed): | 
					 | 
					 | 
					 | 
					def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, st_start, st_stop, line_times, params, sd, seed): | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    overflows = 0 | 
					 | 
					 | 
					 | 
					    overflows = 0 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    for op_idx in range(op_start, op_stop): | 
					 | 
					 | 
					 | 
					    for op_idx in range(op_start, op_stop): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        op = ops[op_idx] | 
					 | 
					 | 
					 | 
					        op = ops[op_idx] | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        for st_idx in range(st_start, st_stop): | 
					 | 
					 | 
					 | 
					        for st_idx in range(st_start, st_stop): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            wave_eval_cpu(op, c, vat, st_idx, line_times, params[st_idx], sd, seed) | 
					 | 
					 | 
					 | 
					            wave_eval_cpu(op, c, c_locs, c_caps, st_idx, line_times, params[st_idx], sd, seed) | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					@numba.njit | 
					 | 
					 | 
					 | 
					@numba.njit | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -347,7 +348,8 @@ class WaveSimCuda(WaveSim): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.c = cuda.to_device(self.c) | 
					 | 
					 | 
					 | 
					        self.c = cuda.to_device(self.c) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.s = cuda.to_device(self.s) | 
					 | 
					 | 
					 | 
					        self.s = cuda.to_device(self.s) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.ops = cuda.to_device(self.ops) | 
					 | 
					 | 
					 | 
					        self.ops = cuda.to_device(self.ops) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.vat = cuda.to_device(self.vat) | 
					 | 
					 | 
					 | 
					        self.c_locs = cuda.to_device(self.c_locs) | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					        self.c_caps = cuda.to_device(self.c_caps) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.timing = cuda.to_device(self.timing) | 
					 | 
					 | 
					 | 
					        self.timing = cuda.to_device(self.timing) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        self.params = cuda.to_device(self.params) | 
					 | 
					 | 
					 | 
					        self.params = cuda.to_device(self.params) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -355,7 +357,7 @@ class WaveSimCuda(WaveSim): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    def s_to_c(self): | 
					 | 
					 | 
					 | 
					    def s_to_c(self): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        grid_dim = self._grid_dim(self.sims, self.s_len) | 
					 | 
					 | 
					 | 
					        grid_dim = self._grid_dim(self.sims, self.s_len) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        wave_assign_gpu[grid_dim, self._block_dim](self.c, self.s, self.vat, self.ppi_offset) | 
					 | 
					 | 
					 | 
					        wave_assign_gpu[grid_dim, self._block_dim](self.c, self.s, self.c_locs, self.ppi_offset) | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    def _grid_dim(self, x, y): | 
					 | 
					 | 
					 | 
					    def _grid_dim(self, x, y): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        gx = math.ceil(x / self._block_dim[0]) | 
					 | 
					 | 
					 | 
					        gx = math.ceil(x / self._block_dim[0]) | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -366,29 +368,29 @@ class WaveSimCuda(WaveSim): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        sims = min(sims or self.sims, self.sims) | 
					 | 
					 | 
					 | 
					        sims = min(sims or self.sims, self.sims) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        for op_start, op_stop in zip(self.level_starts, self.level_stops): | 
					 | 
					 | 
					 | 
					        for op_start, op_stop in zip(self.level_starts, self.level_stops): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            grid_dim = self._grid_dim(sims, op_stop - op_start) | 
					 | 
					 | 
					 | 
					            grid_dim = self._grid_dim(sims, op_stop - op_start) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.vat, int(0), | 
					 | 
					 | 
					 | 
					            wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, int(0), | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					                sims, self.timing, self.params, sd, seed) | 
					 | 
					 | 
					 | 
					                sims, self.timing, self.params, sd, seed) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        cuda.synchronize() | 
					 | 
					 | 
					 | 
					        cuda.synchronize() | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    def c_to_s(self, time=TMAX, sd=0.0, seed=1): | 
					 | 
					 | 
					 | 
					    def c_to_s(self, time=TMAX, sd=0.0, seed=1): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        grid_dim = self._grid_dim(self.sims, self.s_len) | 
					 | 
					 | 
					 | 
					        grid_dim = self._grid_dim(self.sims, self.s_len) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        wave_capture_gpu[grid_dim, self._block_dim](self.c, self.s, self.vat, self.ppo_offset, | 
					 | 
					 | 
					 | 
					        wave_capture_gpu[grid_dim, self._block_dim](self.c, self.s, self.c_locs, self.c_caps, self.ppo_offset, | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            time, sd * math.sqrt(2), seed) | 
					 | 
					 | 
					 | 
					            time, sd * math.sqrt(2), seed) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    def s_ppo_to_ppi(self, time=0.0): | 
					 | 
					 | 
					 | 
					    def s_ppo_to_ppi(self, time=0.0): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        grid_dim = self._grid_dim(self.sims, self.s_len) | 
					 | 
					 | 
					 | 
					        grid_dim = self._grid_dim(self.sims, self.s_len) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        ppo_to_ppi_gpu[grid_dim, self._block_dim](self.s, self.vat, time, self.ppi_offset, self.ppo_offset) | 
					 | 
					 | 
					 | 
					        ppo_to_ppi_gpu[grid_dim, self._block_dim](self.s, self.c_locs, time, self.ppi_offset, self.ppo_offset) | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					@cuda.jit() | 
					 | 
					 | 
					 | 
					@cuda.jit() | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					def wave_assign_gpu(c, s, vat, ppi_offset): | 
					 | 
					 | 
					 | 
					def wave_assign_gpu(c, s, c_locs, ppi_offset): | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    x, y = cuda.grid(2) | 
					 | 
					 | 
					 | 
					    x, y = cuda.grid(2) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    if y >= len(s): return | 
					 | 
					 | 
					 | 
					    if y >= s.shape[1]: return | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    c_loc, c_len, _ = vat[ppi_offset + y] | 
					 | 
					 | 
					 | 
					    c_loc = c_locs[ppi_offset + y] | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    if c_loc < 0: return | 
					 | 
					 | 
					 | 
					    if c_loc < 0: return | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    if x >= c.shape[-1]: return | 
					 | 
					 | 
					 | 
					    if x >= c.shape[-1]: return | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    value = int(s[y, x, 2] >= 0.5) | (2*int(s[y, x, 0] >= 0.5)) | 
					 | 
					 | 
					 | 
					    value = int(s[2, y, x] >= 0.5) | (2*int(s[0, y, x] >= 0.5)) | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    ttime = s[y, x, 1] | 
					 | 
					 | 
					 | 
					    ttime = s[1, y, x] | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    if value == 0: | 
					 | 
					 | 
					 | 
					    if value == 0: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        c[c_loc, x] = TMAX | 
					 | 
					 | 
					 | 
					        c[c_loc, x] = TMAX | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        c[c_loc+1, x] = TMAX | 
					 | 
					 | 
					 | 
					        c[c_loc+1, x] = TMAX | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -421,7 +423,7 @@ def rand_gauss_gpu(seed, sd): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					@cuda.jit() | 
					 | 
					 | 
					 | 
					@cuda.jit() | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					def wave_eval_gpu(ops, op_start, op_stop, cbuf, vat, st_start, st_stop, line_times, param, sd, seed): | 
					 | 
					 | 
					 | 
					def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, st_start, st_stop, line_times, param, sd, seed): | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    x, y = cuda.grid(2) | 
					 | 
					 | 
					 | 
					    x, y = cuda.grid(2) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    st_idx = st_start + x | 
					 | 
					 | 
					 | 
					    st_idx = st_start + x | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    op_idx = op_start + y | 
					 | 
					 | 
					 | 
					    op_idx = op_start + y | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -442,11 +444,12 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, vat, st_start, st_stop, line_tim | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    _seed = (seed << 4) + (z_idx << 20) + (st_idx << 1) | 
					 | 
					 | 
					 | 
					    _seed = (seed << 4) + (z_idx << 20) + (st_idx << 1) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    a_mem = vat[a_idx, 0] | 
					 | 
					 | 
					 | 
					    a_mem = c_locs[a_idx] | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    b_mem = vat[b_idx, 0] | 
					 | 
					 | 
					 | 
					    b_mem = c_locs[b_idx] | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    c_mem = vat[c_idx, 0] | 
					 | 
					 | 
					 | 
					    c_mem = c_locs[c_idx] | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    d_mem = vat[d_idx, 0] | 
					 | 
					 | 
					 | 
					    d_mem = c_locs[d_idx] | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    z_mem, z_cap, _ = vat[z_idx] | 
					 | 
					 | 
					 | 
					    z_mem = c_locs[z_idx] | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    z_cap = c_caps[z_idx] | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    a_cur = int(0) | 
					 | 
					 | 
					 | 
					    a_cur = int(0) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    b_cur = int(0) | 
					 | 
					 | 
					 | 
					    b_cur = int(0) | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -541,10 +544,11 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, vat, st_start, st_stop, line_tim | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					@cuda.jit() | 
					 | 
					 | 
					 | 
					@cuda.jit() | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					def wave_capture_gpu(c, s, vat, ppo_offset, time, s_sqrt2, seed): | 
					 | 
					 | 
					 | 
					def wave_capture_gpu(c, s, c_locs, c_caps, ppo_offset, time, s_sqrt2, seed): | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    x, y = cuda.grid(2) | 
					 | 
					 | 
					 | 
					    x, y = cuda.grid(2) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    if ppo_offset + y >= len(vat): return | 
					 | 
					 | 
					 | 
					    if ppo_offset + y >= len(c_locs): return | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    line, tdim, _ = vat[ppo_offset + y] | 
					 | 
					 | 
					 | 
					    line = c_locs[ppo_offset + y] | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    tdim = c_caps[ppo_offset + y]  | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    if line < 0: return | 
					 | 
					 | 
					 | 
					    if line < 0: return | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    if x >= c.shape[-1]: return | 
					 | 
					 | 
					 | 
					    if x >= c.shape[-1]: return | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    vector = x | 
					 | 
					 | 
					 | 
					    vector = x | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -588,25 +592,25 @@ def wave_capture_gpu(c, s, vat, ppo_offset, time, s_sqrt2, seed): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    else: | 
					 | 
					 | 
					 | 
					    else: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        acc = val | 
					 | 
					 | 
					 | 
					        acc = val | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    s[y, vector, 3] = (c[line, vector] <= TMIN) | 
					 | 
					 | 
					 | 
					    s[3, y, vector] = (c[line, vector] <= TMIN) | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    s[y, vector, 4] = eat | 
					 | 
					 | 
					 | 
					    s[4, y, vector] = eat | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    s[y, vector, 5] = lst | 
					 | 
					 | 
					 | 
					    s[5, y, vector] = lst | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    s[y, vector, 6] = final | 
					 | 
					 | 
					 | 
					    s[6, y, vector] = final | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    s[y, vector, 7] = acc | 
					 | 
					 | 
					 | 
					    s[7, y, vector] = acc | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    s[y, vector, 8] = val | 
					 | 
					 | 
					 | 
					    s[8, y, vector] = val | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    s[y, vector, 9] = 0  # TODO | 
					 | 
					 | 
					 | 
					    s[9, y, vector] = 0  # TODO | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    s[y, vector, 10] = ovl | 
					 | 
					 | 
					 | 
					    s[10, y, vector] = ovl | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					@cuda.jit() | 
					 | 
					 | 
					 | 
					@cuda.jit() | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					def ppo_to_ppi_gpu(s, vat, time, ppi_offset, ppo_offset): | 
					 | 
					 | 
					 | 
					def ppo_to_ppi_gpu(s, c_locs, time, ppi_offset, ppo_offset): | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    x, y = cuda.grid(2) | 
					 | 
					 | 
					 | 
					    x, y = cuda.grid(2) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    if y >= s.shape[0]: return | 
					 | 
					 | 
					 | 
					    if y >= s.shape[0]: return | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    if x >= s.shape[1]: return | 
					 | 
					 | 
					 | 
					    if x >= s.shape[1]: return | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    if vat[ppi_offset + y, 0] < 0: return | 
					 | 
					 | 
					 | 
					    if c_locs[ppi_offset + y] < 0: return | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    if vat[ppo_offset + y, 0] < 0: return | 
					 | 
					 | 
					 | 
					    if c_locs[ppo_offset + y] < 0: return | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    s[y, x, 0] = s[y, x, 2] | 
					 | 
					 | 
					 | 
					    s[0, y, x] = s[2, y, x] | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    s[y, x, 1] = time | 
					 | 
					 | 
					 | 
					    s[1, y, x] = time | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    s[y, x, 2] = s[y, x, 8] | 
					 | 
					 | 
					 | 
					    s[2, y, x] = s[8, y, x] | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
	
		
		
			
				
					| 
						
						
						
					 | 
					 | 
					
  |