Browse Source

overflow notification and wavecap statistics on GPU

main
Stefan Holst 4 years ago
parent
commit
1af346c97a
  1. 35
      kyupy/wave_sim.py
  2. 55
      kyupy/wave_sim_cuda.py
  3. 8
      tests/test_wave_sim.py

35
kyupy/wave_sim.py

@ -4,6 +4,7 @@ from . import numba @@ -4,6 +4,7 @@ from . import numba
TMAX = np.float32(2 ** 127) # almost np.PINF for 32-bit floating point values
TMAX_OVL = np.float32(1.1 * 2 ** 127) # almost np.PINF with overflow mark
TMIN = np.float32(-2 ** 127) # almost np.NINF for 32-bit floating point values
@ -16,7 +17,7 @@ class WaveSim: @@ -16,7 +17,7 @@ class WaveSim:
self.lst_eat_valid = False
self.cdata = np.zeros((len(self.interface), sims, 6), dtype='float32')
self.cdata = np.zeros((len(self.interface), sims, 7), dtype='float32')
if type(wavecaps) is int:
wavecaps = [wavecaps] * len(circuit.lines)
@ -25,25 +26,25 @@ class WaveSim: @@ -25,25 +26,25 @@ class WaveSim:
# state allocation table. maps line and interface indices to self.state memory locations
self.sat = np.zeros((len(circuit.lines) + 2 + 2 * len(self.interface), 2), dtype='int')
self.sat = np.zeros((len(circuit.lines) + 2 + 2 * len(self.interface), 3), dtype='int')
self.sat[:, 0] = -1
filled = 0
for lidx, cap in enumerate(wavecaps):
self.sat[lidx] = filled, cap
self.sat[lidx] = filled, cap, 0
filled += cap
self.zero_idx = len(circuit.lines)
self.sat[self.zero_idx] = filled, intf_wavecap
self.sat[self.zero_idx] = filled, intf_wavecap, 0
filled += intf_wavecap
self.tmp_idx = self.zero_idx + 1
self.sat[self.tmp_idx] = filled, intf_wavecap
self.sat[self.tmp_idx] = filled, intf_wavecap, 0
filled += intf_wavecap
self.ppi_offset = self.tmp_idx + 1
self.ppo_offset = self.ppi_offset + len(self.interface)
for i, n in enumerate(self.interface):
if len(n.outs) > 0:
self.sat[self.ppi_offset + i] = filled, intf_wavecap
self.sat[self.ppi_offset + i] = filled, intf_wavecap, 0
filled += intf_wavecap
if len(n.ins) > 0:
self.sat[self.ppo_offset + i] = self.sat[n.ins[0].index]
@ -161,7 +162,7 @@ class WaveSim: @@ -161,7 +162,7 @@ class WaveSim:
def wave(self, line, vector):
if line < 0:
return [TMAX]
mem, wcap = self.sat[line]
mem, wcap, _ = self.sat[line]
if mem < 0:
return [TMAX]
return self.state[mem:mem + wcap, vector]
@ -186,8 +187,8 @@ class WaveSim: @@ -186,8 +187,8 @@ class WaveSim:
def reassign(self, time=0.0):
for i, node in enumerate(self.interface):
ppi_loc = self.sat[self.ppi_offset + i]
ppo_loc = self.sat[self.ppo_offset + i]
ppi_loc = self.sat[self.ppi_offset + i, 0]
ppo_loc = self.sat[self.ppo_offset + i, 0]
if ppi_loc < 0 or ppo_loc < 0: continue
for sidx in range(self.sims):
ival = self.val(self.ppi_offset + i, sidx, TMAX) > 0.5
@ -274,10 +275,14 @@ class WaveSim: @@ -274,10 +275,14 @@ class WaveSim:
eat = TMAX
lst = TMIN
tog = 0
ovl = 0
val = int(0)
final = int(0)
for t in self.wave(line, vector):
if t >= TMAX: break
if t >= TMAX:
if t == TMAX_OVL:
ovl = 1
break
m = -m
final ^= 1
if t < time:
@ -304,7 +309,7 @@ class WaveSim: @@ -304,7 +309,7 @@ class WaveSim:
else:
acc = val
return acc, val, final, (val != final), eat, lst
return acc, val, final, (val != final), eat, lst, ovl
@numba.njit
@ -342,7 +347,7 @@ def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0): @@ -342,7 +347,7 @@ def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0):
a_mem = sat[a_idx, 0]
b_mem = sat[b_idx, 0]
z_mem, z_cap = sat[z_idx]
z_mem, z_cap, _ = sat[z_idx]
a_cur = int(0)
b_cur = int(0)
@ -397,5 +402,9 @@ def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0): @@ -397,5 +402,9 @@ def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0):
previous_t = TMIN
current_t = min(a, b)
state[z_mem + z_cur, st_idx] = TMAX
if overflows > 0:
state[z_mem + z_cur, st_idx] = TMAX_OVL
else:
state[z_mem + z_cur, st_idx] = a if a > b else b # propagate overflow flags by storing biggest TMAX from input
return overflows

55
kyupy/wave_sim_cuda.py

@ -4,6 +4,7 @@ from .wave_sim import WaveSim @@ -4,6 +4,7 @@ from .wave_sim import WaveSim
from . import cuda
TMAX = np.float32(2 ** 127) # almost np.PINF for 32-bit floating point values
TMAX_OVL = np.float32(1.1 * 2 ** 127) # almost np.PINF with overflow mark
TMIN = np.float32(-2 ** 127) # almost np.NINF for 32-bit floating point values
@ -64,7 +65,7 @@ class WaveSimCuda(WaveSim): @@ -64,7 +65,7 @@ class WaveSimCuda(WaveSim):
def wave(self, line, vector):
if line < 0:
return None
mem, wcap = self.sat[line]
mem, wcap, _ = self.sat[line]
if mem < 0:
return None
return self.d_state[mem:mem + wcap, vector]
@ -87,6 +88,31 @@ class WaveSimCuda(WaveSim): @@ -87,6 +88,31 @@ class WaveSimCuda(WaveSim):
self.d_cdata, time)
cuda.synchronize()
def wavecaps(self):
gx = math.ceil(len(self.circuit.lines) / 512)
wavecaps_kernel[gx, 512](self.d_state, self.d_sat, self.sims)
self.sat[...] = self.d_sat
return self.sat[..., 2]
@cuda.jit()
def wavecaps_kernel(state, sat, sims):
idx = cuda.grid(1)
if idx >= len(sat): return
lidx, lcap, _ = sat[idx]
if lidx < 0: return
wcap = 0
for sidx in range(sims):
for tidx in range(lcap):
t = state[lidx + tidx, sidx]
if tidx > wcap:
wcap = tidx
if t >= TMAX: break
sat[idx, 2] = wcap + 1
@cuda.jit()
def reassign_kernel(state, sat, ppi_offset, ppo_offset, cdata, ppi_time):
@ -94,8 +120,8 @@ def reassign_kernel(state, sat, ppi_offset, ppo_offset, cdata, ppi_time): @@ -94,8 +120,8 @@ def reassign_kernel(state, sat, ppi_offset, ppo_offset, cdata, ppi_time):
if vector >= state.shape[-1]: return
if ppo_offset + y >= len(sat): return
ppo, ppo_cap = sat[ppo_offset + y]
ppi, ppi_cap = sat[ppi_offset + y]
ppo, ppo_cap, _ = sat[ppo_offset + y]
ppi, ppi_cap, _ = sat[ppi_offset + y]
if ppo < 0: return
if ppi < 0: return
@ -121,7 +147,7 @@ def reassign_kernel(state, sat, ppi_offset, ppo_offset, cdata, ppi_time): @@ -121,7 +147,7 @@ def reassign_kernel(state, sat, ppi_offset, ppo_offset, cdata, ppi_time):
def capture_kernel(state, sat, ppo_offset, cdata, time, s_sqrt2, seed):
x, y = cuda.grid(2)
if ppo_offset + y >= len(sat): return
line, tdim = sat[ppo_offset + y]
line, tdim, _ = sat[ppo_offset + y]
if line < 0: return
if x >= state.shape[-1]: return
vector = x
@ -130,11 +156,15 @@ def capture_kernel(state, sat, ppo_offset, cdata, time, s_sqrt2, seed): @@ -130,11 +156,15 @@ def capture_kernel(state, sat, ppo_offset, cdata, time, s_sqrt2, seed):
eat = TMAX
lst = TMIN
tog = 0
ovl = 0
val = int(0)
final = int(0)
for tidx in range(tdim):
t = state[line + tidx, vector]
if t >= TMAX: break
if t >= TMAX:
if t == TMAX_OVL:
ovl = 1
break
m = -m
final ^= 1
if t < time:
@ -167,6 +197,7 @@ def capture_kernel(state, sat, ppo_offset, cdata, time, s_sqrt2, seed): @@ -167,6 +197,7 @@ def capture_kernel(state, sat, ppo_offset, cdata, time, s_sqrt2, seed):
cdata[y, vector, 3] = (val != final)
cdata[y, vector, 4] = eat
cdata[y, vector, 5] = lst
cdata[y, vector, 6] = ovl
@cuda.jit()
@ -219,12 +250,13 @@ def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_time @@ -219,12 +250,13 @@ def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_time
z_idx = ops[op_idx, 1]
a_idx = ops[op_idx, 2]
b_idx = ops[op_idx, 3]
overflows = int(0)
_seed = (seed << 4) + (z_idx << 20) + (st_idx << 1)
z_mem, z_cap = sat[z_idx]
a_mem = sat[a_idx, 0]
b_mem = sat[b_idx, 0]
_seed = (seed << 4) + (z_idx << 20) + (st_idx << 1)
z_mem, z_cap, _ = sat[z_idx]
a_cur = int(0)
b_cur = int(0)
@ -268,7 +300,7 @@ def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_time @@ -268,7 +300,7 @@ def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_time
previous_t = current_t
z_cur += 1
else:
# overflows += 1
overflows += 1
previous_t = state[z_mem + z_cur - 1, st_idx]
z_cur -= 1
else:
@ -279,4 +311,7 @@ def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_time @@ -279,4 +311,7 @@ def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_time
previous_t = TMIN
current_t = min(a, b)
state[z_mem + z_cur, st_idx] = TMAX
if overflows > 0:
state[z_mem + z_cur, st_idx] = TMAX_OVL
else:
state[z_mem + z_cur, st_idx] = a if a > b else b # propagate overflow flags by storing biggest TMAX from input

8
tests/test_wave_sim.py

@ -26,10 +26,10 @@ def test_wave_eval(): @@ -26,10 +26,10 @@ def test_wave_eval():
a = state[0:16, 0]
b = state[16:32, 0]
z = state[32:, 0]
sat = np.zeros((3, 2), dtype='int')
sat[0] = 0, 16
sat[1] = 16, 16
sat[2] = 32, 16
sat = np.zeros((3, 3), dtype='int')
sat[0] = 0, 16, 0
sat[1] = 16, 16, 0
sat[2] = 32, 16, 0
wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times)
assert TMIN == z[0]

Loading…
Cancel
Save