Browse Source

first gpu-code, cached test fixtures

devel
Stefan Holst 2 years ago
parent
commit
3497bfdc75
  1. 288
      src/kyupy/wave_sim4.py
  2. 12
      tests/conftest.py
  3. 24
      tests/test_wave_sim4.py

288
src/kyupy/wave_sim4.py

@ -10,7 +10,6 @@ The simulators are not event-based and are not capable of simulating sequential
""" """
import math import math
from bisect import bisect, insort_left
import numpy as np import numpy as np
@ -76,19 +75,14 @@ class WaveSim(SimOps):
* ``s[..., 8]`` (P)PO sampled capture value: decided by random sampling according to a given seed. * ``s[..., 8]`` (P)PO sampled capture value: decided by random sampling according to a given seed.
* ``s[..., 9]`` (P)PO sampled capture slack: (capture time - LST) - decided by random sampling according to a given seed. * ``s[..., 9]`` (P)PO sampled capture slack: (capture time - LST) - decided by random sampling according to a given seed.
* ``s[..., 10]`` Overflow indicator: If non-zero, some signals in the input cone of this output had more * ``s[..., 10]`` Overflow indicator: If non-zero, some signals in the input cone of this output had more
transitions than specified in ``wavecaps``. Some transitions have been discarded, the transitions than specified in ``c_caps``. Some transitions have been discarded, the
final values in the waveforms are still valid. final values in the waveforms are still valid.
""" """
self.params = np.zeros((sims, 4), dtype=np.float32) self.params = np.zeros((sims, 4), dtype=np.float32)
self.params[...,0] = 1.0 self.params[...,0] = 1.0
m1 = np.array([2 ** x for x in range(7, -1, -1)], dtype=np.uint8) self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.vat, self.ops, self.params)])
m0 = ~m1
self.mask = np.rollaxis(np.vstack((m0, m1)), 1)
self.overflows = 0
self.lst_eat_valid = False
self.pi_s_locs = np.flatnonzero(self.vat[self.ppi_offset+np.arange(len(self.circuit.io_nodes)), 0] >= 0) self.pi_s_locs = np.flatnonzero(self.vat[self.ppi_offset+np.arange(len(self.circuit.io_nodes)), 0] >= 0)
self.po_s_locs = np.flatnonzero(self.vat[self.ppo_offset+np.arange(len(self.circuit.io_nodes)), 0] >= 0) self.po_s_locs = np.flatnonzero(self.vat[self.ppo_offset+np.arange(len(self.circuit.io_nodes)), 0] >= 0)
@ -105,20 +99,9 @@ class WaveSim(SimOps):
self.pippi_c_locs = np.concatenate([self.pi_c_locs, self.ppi_c_locs]) self.pippi_c_locs = np.concatenate([self.pi_c_locs, self.ppi_c_locs])
self.poppo_c_locs = np.concatenate([self.po_c_locs, self.ppo_c_locs]) self.poppo_c_locs = np.concatenate([self.po_c_locs, self.ppo_c_locs])
self.wave_capture = numba.njit(WaveSim.wave_capture)
def __repr__(self): def __repr__(self):
total_mem = self.c.nbytes + self.vat.nbytes + self.ops.nbytes + self.s.nbytes return f'<{type(self).__name__} {self.circuit.name} sims={self.sims} ops={len(self.ops)} ' + \
return f'<WaveSim {self.circuit.name} sims={self.sims} ops={len(self.ops)} ' + \ f'levels={len(self.level_starts)} mem={hr_bytes(self.nbytes)}>'
f'levels={len(self.level_starts)} mem={hr_bytes(total_mem)}>'
def get_line_delay(self, line, polarity):
"""Returns the current delay of the given ``line`` and ``polarity`` in the simulation model."""
return self.timing[line, 0, polarity]
def set_line_delay(self, line, polarity, delay):
"""Sets a new ``delay`` for the given ``line`` and ``polarity`` in the simulation model."""
self.timing[line, 0, polarity] = delay
def s_to_c(self): def s_to_c(self):
"""Transfers values of sequential elements and primary inputs to the combinational portion. """Transfers values of sequential elements and primary inputs to the combinational portion.
@ -141,9 +124,8 @@ class WaveSim(SimOps):
""" """
sims = min(sims or self.sims, self.sims) sims = min(sims or self.sims, self.sims)
for op_start, op_stop in zip(self.level_starts, self.level_stops): for op_start, op_stop in zip(self.level_starts, self.level_stops):
self.overflows += level_eval(self.ops, op_start, op_stop, self.c, self.vat, 0, sims, level_eval_cpu(self.ops, op_start, op_stop, self.c, self.vat, 0, sims,
self.timing, self.params, sd, seed) self.timing, self.params, sd, seed)
self.lst_eat_valid = False
def c_to_s(self, time=TMAX, sd=0.0, seed=1): def c_to_s(self, time=TMAX, sd=0.0, seed=1):
"""Simulates a capture operation at all sequential elements and primary outputs. """Simulates a capture operation at all sequential elements and primary outputs.
@ -157,7 +139,7 @@ class WaveSim(SimOps):
""" """
for s_loc, (c_loc, c_len, _) in zip(self.poppo_s_locs, self.vat[self.ppo_offset+self.poppo_s_locs]): for s_loc, (c_loc, c_len, _) in zip(self.poppo_s_locs, self.vat[self.ppo_offset+self.poppo_s_locs]):
for vector in range(self.sims): for vector in range(self.sims):
self.s[s_loc, vector, 3:] = self.wave_capture(self.c, c_loc, c_len, vector, time=time, sd=sd, seed=seed) self.s[s_loc, vector, 3:] = wave_capture_cpu(self.c, c_loc, c_len, vector, time=time, sd=sd, seed=seed)
def s_ppo_to_ppi(self, time=0.0): def s_ppo_to_ppi(self, time=0.0):
"""Re-assigns the last sampled capture to the appropriate pseudo-primary inputs (PPI). """Re-assigns the last sampled capture to the appropriate pseudo-primary inputs (PPI).
@ -170,8 +152,141 @@ class WaveSim(SimOps):
self.s[self.ppio_s_locs, :, 1] = time self.s[self.ppio_s_locs, :, 1] = time
self.s[self.ppio_s_locs, :, 2] = self.s[self.ppio_s_locs, :, 8] self.s[self.ppio_s_locs, :, 2] = self.s[self.ppio_s_locs, :, 8]
@staticmethod
def wave_capture(c, c_loc, c_len, vector, time=TMAX, sd=0.0, seed=1): @numba.njit
def rand_gauss_cpu(seed, sd):
clamp = 0.5
if sd <= 0.0:
return 1.0
while True:
x = -6.0
for _ in range(12):
seed = int(0xDEECE66D) * seed + 0xB
x += float((seed >> 8) & 0xffffff) / float(1 << 24)
x *= sd
if abs(x) <= clamp:
break
return x + 1.0
@numba.njit
def wave_eval_cpu(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0):
lut, z_idx, a_idx, b_idx, c_idx, d_idx = op
# >>> same code as wave_eval_cpu (except rand_gauss_*pu()-call) >>>
overflows = int(0)
_seed = (seed << 4) + (z_idx << 20) + (st_idx << 1)
a_mem = vat[a_idx, 0]
b_mem = vat[b_idx, 0]
c_mem = vat[c_idx, 0]
d_mem = vat[d_idx, 0]
z_mem, z_cap, _ = vat[z_idx]
a_cur = int(0)
b_cur = int(0)
c_cur = int(0)
d_cur = int(0)
z_cur = lut & 1
if z_cur == 1:
cbuf[z_mem, st_idx] = TMIN
a = cbuf[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss_cpu(_seed ^ a_mem ^ z_cur, sd) * param[0]
if int(param[1]) == a_idx: a += param[2+z_cur]
b = cbuf[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss_cpu(_seed ^ b_mem ^ z_cur, sd) * param[0]
if int(param[1]) == b_idx: b += param[2+z_cur]
c = cbuf[c_mem, st_idx] + line_times[c_idx, 0, z_cur] * rand_gauss_cpu(_seed ^ c_mem ^ z_cur, sd) * param[0]
if int(param[1]) == c_idx: c += param[2+z_cur]
d = cbuf[d_mem, st_idx] + line_times[d_idx, 0, z_cur] * rand_gauss_cpu(_seed ^ d_mem ^ z_cur, sd) * param[0]
if int(param[1]) == d_idx: d += param[2+z_cur]
previous_t = TMIN
current_t = min(a, b, c, d)
inputs = int(0)
while current_t < TMAX:
z_val = z_cur & 1
if a == current_t:
a_cur += 1
a = cbuf[a_mem + a_cur, st_idx]
a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss_cpu(_seed ^ a_mem ^ z_val ^ 1, sd) * param[0]
thresh = line_times[a_idx, 1, z_val] * rand_gauss_cpu(_seed ^ a_mem ^ z_val, sd) * param[0]
if int(param[1]) == a_idx:
a += param[2+(z_val^1)]
thresh += param[2+z_val]
inputs ^= 1
next_t = a
elif b == current_t:
b_cur += 1
b = cbuf[b_mem + b_cur, st_idx]
b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss_cpu(_seed ^ b_mem ^ z_val ^ 1, sd) * param[0]
thresh = line_times[b_idx, 1, z_val] * rand_gauss_cpu(_seed ^ b_mem ^ z_val, sd) * param[0]
if int(param[1]) == b_idx:
b += param[2+(z_val^1)]
thresh += param[2+z_val]
inputs ^= 2
next_t = b
elif c == current_t:
c_cur += 1
c = cbuf[c_mem + c_cur, st_idx]
c += line_times[c_idx, 0, z_val ^ 1] * rand_gauss_cpu(_seed ^ c_mem ^ z_val ^ 1, sd) * param[0]
thresh = line_times[c_idx, 1, z_val] * rand_gauss_cpu(_seed ^ c_mem ^ z_val, sd) * param[0]
if int(param[1]) == c_idx:
c += param[2+(z_val^1)]
thresh += param[2+z_val]
inputs ^= 4
next_t = c
else:
d_cur += 1
d = cbuf[d_mem + d_cur, st_idx]
d += line_times[d_idx, 0, z_val ^ 1] * rand_gauss_cpu(_seed ^ d_mem ^ z_val ^ 1, sd) * param[0]
thresh = line_times[d_idx, 1, z_val] * rand_gauss_cpu(_seed ^ d_mem ^ z_val, sd) * param[0]
if int(param[1]) == d_idx:
d += param[2+(z_val^1)]
thresh += param[2+z_val]
inputs ^= 8
next_t = d
if (z_cur & 1) != ((lut >> inputs) & 1):
# we generate a toggle in z_mem, if:
# ( it is the first toggle in z_mem OR
# following toggle is earlier OR
# pulse is wide enough ) AND enough space in z_mem.
if z_cur == 0 or next_t < current_t or (current_t - previous_t) > thresh:
if z_cur < (z_cap - 1):
cbuf[z_mem + z_cur, st_idx] = current_t
previous_t = current_t
z_cur += 1
else:
overflows += 1
previous_t = cbuf[z_mem + z_cur - 1, st_idx]
z_cur -= 1
else:
z_cur -= 1
previous_t = cbuf[z_mem + z_cur - 1, st_idx] if z_cur > 0 else TMIN
current_t = min(a, b, c, d)
# generate overflow flag or propagate from input
cbuf[z_mem + z_cur, st_idx] = TMAX_OVL if overflows > 0 else max(a, b, c, d)
@numba.njit
def level_eval_cpu(ops, op_start, op_stop, c, vat, st_start, st_stop, line_times, params, sd, seed):
overflows = 0
for op_idx in range(op_start, op_stop):
op = ops[op_idx]
for st_idx in range(st_start, st_stop):
wave_eval_cpu(op, c, vat, st_idx, line_times, params[st_idx], sd, seed)
@numba.njit
def wave_capture_cpu(c, c_loc, c_len, vector, time=TMAX, sd=0.0, seed=1):
s_sqrt2 = sd * math.sqrt(2) s_sqrt2 = sd * math.sqrt(2)
m = 0.5 m = 0.5
acc = 0.0 acc = 0.0
@ -216,18 +331,50 @@ class WaveSim(SimOps):
return (w[0] <= TMIN), eat, lst, final, acc, val, 0, ovl return (w[0] <= TMIN), eat, lst, final, acc, val, 0, ovl
@numba.njit class WaveSimCuda(WaveSim):
def level_eval(ops, op_start, op_stop, c, vat, st_start, st_stop, line_times, params, sd, seed): """A GPU-accelerated waveform-based combinational logic timing simulator.
overflows = 0
for op_idx in range(op_start, op_stop):
op = ops[op_idx]
for st_idx in range(st_start, st_stop):
overflows += wave_eval(op, c, vat, st_idx, line_times, params[st_idx], sd, seed)
return overflows
The API is the same as for :py:class:`WaveSim`.
All internal memories are mirrored into GPU memory upon construction.
Some operations like access to single waveforms can involve large communication overheads.
"""
def __init__(self, circuit, timing, sims=8, c_caps=16, c_reuse=False, strip_forks=False):
super().__init__(circuit, timing, sims, c_caps, c_reuse, strip_forks)
@numba.njit self.c = cuda.to_device(self.c)
def rand_gauss(seed, sd): self.s = cuda.to_device(self.s)
self.ops = cuda.to_device(self.ops)
self.vat = cuda.to_device(self.vat)
self.timing = cuda.to_device(self.timing)
self.params = cuda.to_device(self.params)
self._block_dim = (32, 16)
# TODO implement on GPU
#def s_to_c(self):
def _grid_dim(self, x, y):
gx = math.ceil(x / self._block_dim[0])
gy = math.ceil(y / self._block_dim[1])
return gx, gy
def c_prop(self, sims=None, sd=0.0, seed=1):
sims = min(sims or self.sims, self.sims)
for op_start, op_stop in zip(self.level_starts, self.level_stops):
grid_dim = self._grid_dim(sims, op_stop - op_start)
wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.vat, int(0),
sims, self.timing, self.params, sd, seed)
cuda.synchronize()
# TODO implement on GPU
#def c_to_s(self):
# TODO implement on GPU
#def s_ppo_to_ppi(self, time=0.0):
@cuda.jit(device=True)
def rand_gauss_gpu(seed, sd):
clamp = 0.5 clamp = 0.5
if sd <= 0.0: if sd <= 0.0:
return 1.0 return 1.0
@ -242,9 +389,24 @@ def rand_gauss(seed, sd):
return x + 1.0 return x + 1.0
@numba.njit @cuda.jit()
def wave_eval(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0): def wave_eval_gpu(ops, op_start, op_stop, cbuf, vat, st_start, st_stop, line_times, param, sd, seed):
lut, z_idx, a_idx, b_idx, c_idx, d_idx = op x, y = cuda.grid(2)
st_idx = st_start + x
op_idx = op_start + y
if st_idx >= st_stop: return
if op_idx >= op_stop: return
lut = ops[op_idx, 0]
z_idx = ops[op_idx, 1]
a_idx = ops[op_idx, 2]
b_idx = ops[op_idx, 3]
c_idx = ops[op_idx, 4]
d_idx = ops[op_idx, 5]
param = param[st_idx]
# >>> same code as wave_eval_cpu (except rand_gauss_*pu()-call) >>>
overflows = int(0) overflows = int(0)
_seed = (seed << 4) + (z_idx << 20) + (st_idx << 1) _seed = (seed << 4) + (z_idx << 20) + (st_idx << 1)
@ -263,13 +425,13 @@ def wave_eval(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0):
if z_cur == 1: if z_cur == 1:
cbuf[z_mem, st_idx] = TMIN cbuf[z_mem, st_idx] = TMIN
a = cbuf[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss(_seed ^ a_mem ^ z_cur, sd) * param[0] a = cbuf[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss_gpu(_seed ^ a_mem ^ z_cur, sd) * param[0]
if int(param[1]) == a_idx: a += param[2+z_cur] if int(param[1]) == a_idx: a += param[2+z_cur]
b = cbuf[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss(_seed ^ b_mem ^ z_cur, sd) * param[0] b = cbuf[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss_gpu(_seed ^ b_mem ^ z_cur, sd) * param[0]
if int(param[1]) == b_idx: b += param[2+z_cur] if int(param[1]) == b_idx: b += param[2+z_cur]
c = cbuf[c_mem, st_idx] + line_times[c_idx, 0, z_cur] * rand_gauss(_seed ^ c_mem ^ z_cur, sd) * param[0] c = cbuf[c_mem, st_idx] + line_times[c_idx, 0, z_cur] * rand_gauss_gpu(_seed ^ c_mem ^ z_cur, sd) * param[0]
if int(param[1]) == c_idx: c += param[2+z_cur] if int(param[1]) == c_idx: c += param[2+z_cur]
d = cbuf[d_mem, st_idx] + line_times[d_idx, 0, z_cur] * rand_gauss(_seed ^ d_mem ^ z_cur, sd) * param[0] d = cbuf[d_mem, st_idx] + line_times[d_idx, 0, z_cur] * rand_gauss_gpu(_seed ^ d_mem ^ z_cur, sd) * param[0]
if int(param[1]) == d_idx: d += param[2+z_cur] if int(param[1]) == d_idx: d += param[2+z_cur]
previous_t = TMIN previous_t = TMIN
@ -282,8 +444,8 @@ def wave_eval(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0):
if a == current_t: if a == current_t:
a_cur += 1 a_cur += 1
a = cbuf[a_mem + a_cur, st_idx] a = cbuf[a_mem + a_cur, st_idx]
a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ a_mem ^ z_val ^ 1, sd) * param[0] a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss_gpu(_seed ^ a_mem ^ z_val ^ 1, sd) * param[0]
thresh = line_times[a_idx, 1, z_val] * rand_gauss(_seed ^ a_mem ^ z_val, sd) * param[0] thresh = line_times[a_idx, 1, z_val] * rand_gauss_gpu(_seed ^ a_mem ^ z_val, sd) * param[0]
if int(param[1]) == a_idx: if int(param[1]) == a_idx:
a += param[2+(z_val^1)] a += param[2+(z_val^1)]
thresh += param[2+z_val] thresh += param[2+z_val]
@ -293,8 +455,8 @@ def wave_eval(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0):
elif b == current_t: elif b == current_t:
b_cur += 1 b_cur += 1
b = cbuf[b_mem + b_cur, st_idx] b = cbuf[b_mem + b_cur, st_idx]
b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ b_mem ^ z_val ^ 1, sd) * param[0] b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss_gpu(_seed ^ b_mem ^ z_val ^ 1, sd) * param[0]
thresh = line_times[b_idx, 1, z_val] * rand_gauss(_seed ^ b_mem ^ z_val, sd) * param[0] thresh = line_times[b_idx, 1, z_val] * rand_gauss_gpu(_seed ^ b_mem ^ z_val, sd) * param[0]
if int(param[1]) == b_idx: if int(param[1]) == b_idx:
b += param[2+(z_val^1)] b += param[2+(z_val^1)]
thresh += param[2+z_val] thresh += param[2+z_val]
@ -304,8 +466,8 @@ def wave_eval(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0):
elif c == current_t: elif c == current_t:
c_cur += 1 c_cur += 1
c = cbuf[c_mem + c_cur, st_idx] c = cbuf[c_mem + c_cur, st_idx]
c += line_times[c_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ c_mem ^ z_val ^ 1, sd) * param[0] c += line_times[c_idx, 0, z_val ^ 1] * rand_gauss_gpu(_seed ^ c_mem ^ z_val ^ 1, sd) * param[0]
thresh = line_times[c_idx, 1, z_val] * rand_gauss(_seed ^ c_mem ^ z_val, sd) * param[0] thresh = line_times[c_idx, 1, z_val] * rand_gauss_gpu(_seed ^ c_mem ^ z_val, sd) * param[0]
if int(param[1]) == c_idx: if int(param[1]) == c_idx:
c += param[2+(z_val^1)] c += param[2+(z_val^1)]
thresh += param[2+z_val] thresh += param[2+z_val]
@ -315,19 +477,13 @@ def wave_eval(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0):
else: else:
d_cur += 1 d_cur += 1
d = cbuf[d_mem + d_cur, st_idx] d = cbuf[d_mem + d_cur, st_idx]
d += line_times[d_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ d_mem ^ z_val ^ 1, sd) * param[0] d += line_times[d_idx, 0, z_val ^ 1] * rand_gauss_gpu(_seed ^ d_mem ^ z_val ^ 1, sd) * param[0]
thresh = line_times[d_idx, 1, z_val] * rand_gauss(_seed ^ d_mem ^ z_val, sd) * param[0] thresh = line_times[d_idx, 1, z_val] * rand_gauss_gpu(_seed ^ d_mem ^ z_val, sd) * param[0]
if int(param[1]) == d_idx: if int(param[1]) == d_idx:
d += param[2+(z_val^1)] d += param[2+(z_val^1)]
thresh += param[2+z_val] thresh += param[2+z_val]
inputs ^= 8 inputs ^= 8
next_t = d next_t = d
#print("previous_t",previous_t)
#print("current_t",current_t)
#print(current_t - previous_t)
#print(thresh)
#print(z_cur & 1)
#print((lut >> inputs) & 1)
if (z_cur & 1) != ((lut >> inputs) & 1): if (z_cur & 1) != ((lut >> inputs) & 1):
# we generate a toggle in z_mem, if: # we generate a toggle in z_mem, if:
@ -335,12 +491,8 @@ def wave_eval(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0):
# following toggle is earlier OR # following toggle is earlier OR
# pulse is wide enough ) AND enough space in z_mem. # pulse is wide enough ) AND enough space in z_mem.
if z_cur == 0 or next_t < current_t or (current_t - previous_t) > thresh: if z_cur == 0 or next_t < current_t or (current_t - previous_t) > thresh:
#print(current_t - previous_t)
#print(thresh)
#print(z_cap)
if z_cur < (z_cap - 1): if z_cur < (z_cap - 1):
cbuf[z_mem + z_cur, st_idx] = current_t cbuf[z_mem + z_cur, st_idx] = current_t
#print(cbuf[z_mem + z_cur, st_idx])
previous_t = current_t previous_t = current_t
z_cur += 1 z_cur += 1
else: else:
@ -348,18 +500,10 @@ def wave_eval(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0):
previous_t = cbuf[z_mem + z_cur - 1, st_idx] previous_t = cbuf[z_mem + z_cur - 1, st_idx]
z_cur -= 1 z_cur -= 1
else: else:
#print(a)
z_cur -= 1 z_cur -= 1
if z_cur > 0: previous_t = cbuf[z_mem + z_cur - 1, st_idx] if z_cur > 0 else TMIN
previous_t = cbuf[z_mem + z_cur - 1, st_idx]
else:
previous_t = TMIN
current_t = min(a, b, c, d) current_t = min(a, b, c, d)
if overflows > 0: # generate overflow flag or propagate from input
cbuf[z_mem + z_cur, st_idx] = TMAX_OVL cbuf[z_mem + z_cur, st_idx] = TMAX_OVL if overflows > 0 else max(a, b, c, d)
else:
cbuf[z_mem + z_cur, st_idx] = a if a == max(a, b, c, d) else b if b == max(a, b, c, d) else c if c == max(a, b, c, d) else d # propagate overflow flags by storing biggest TMAX from input
return overflows

12
tests/conftest.py

@ -1,8 +1,18 @@
import pytest import pytest
@pytest.fixture @pytest.fixture(scope='session')
def mydir(): def mydir():
import os import os
from pathlib import Path from pathlib import Path
return Path(os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))) return Path(os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))))
@pytest.fixture(scope='session')
def b14_circuit(mydir):
from kyupy import verilog
return verilog.load(mydir / 'b14.v.gz', branchforks=True)
@pytest.fixture(scope='session')
def b14_timing(mydir, b14_circuit):
from kyupy import sdf
return sdf.load(mydir / 'b14.sdf.gz').annotation(b14_circuit)

24
tests/test_wave_sim4.py

@ -1,6 +1,6 @@
import numpy as np import numpy as np
from kyupy.wave_sim4 import WaveSim, wave_eval, TMIN, TMAX from kyupy.wave_sim4 import WaveSim, WaveSimCuda, wave_eval_cpu, TMIN, TMAX
from kyupy.logic_sim import LogicSim from kyupy.logic_sim import LogicSim
from kyupy import verilog, sdf, logic, bench from kyupy import verilog, sdf, logic, bench
from kyupy.logic import MVArray, BPArray from kyupy.logic import MVArray, BPArray
@ -32,7 +32,7 @@ def test_nand_delays():
def wave_assert(inputs, output): def wave_assert(inputs, output):
for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i
wave_eval(op, c, vat, 0, line_times, sdata) wave_eval_cpu(op, c, vat, 0, line_times, sdata)
for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[4,i], v) for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[4,i], v)
wave_assert([[TMAX,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(0,0,1,1) => 1 wave_assert([[TMAX,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(0,0,1,1) => 1
@ -53,7 +53,6 @@ def test_tiny_circuit():
lt = np.zeros((len(c.lines), 2, 2)) lt = np.zeros((len(c.lines), 2, 2))
lt[:,0,:] = 1.0 # unit delay for all lines lt[:,0,:] = 1.0 # unit delay for all lines
wsim = WaveSim(c, lt) wsim = WaveSim(c, lt)
print(wsim.prim_counts)
assert len(wsim.s) == 5 assert len(wsim.s) == 5
# values for x # values for x
@ -157,18 +156,11 @@ def compare_to_logic_sim(wsim: WaveSim):
assert res_str == exp_str assert res_str == exp_str
def test_b14(mydir): def test_b14(b14_circuit, b14_timing):
c = verilog.load(mydir / 'b14.v.gz', branchforks=True) compare_to_logic_sim(WaveSim(b14_circuit, b14_timing, 8))
df = sdf.load(mydir / 'b14.sdf.gz')
lt = df.annotation(c)
wsim = WaveSim(c, lt, 8)
compare_to_logic_sim(wsim)
def test_b14_strip_forks(b14_circuit, b14_timing):
compare_to_logic_sim(WaveSim(b14_circuit, b14_timing, 8, strip_forks=True))
def test_b14_strip_forks(mydir): def test_b14_cuda(b14_circuit, b14_timing):
c = verilog.load(mydir / 'b14.v.gz', branchforks=True) compare_to_logic_sim(WaveSimCuda(b14_circuit, b14_timing, 8, strip_forks=True))
df = sdf.load(mydir / 'b14.sdf.gz')
lt = df.annotation(c)
wsim = WaveSim(c, lt, 8, strip_forks=True)
compare_to_logic_sim(wsim)

Loading…
Cancel
Save