Browse Source

random sampling of delays

devel
Stefan Holst 2 years ago
parent
commit
44b0c887d7
  1. 4
      src/kyupy/__init__.py
  2. 4
      src/kyupy/circuit.py
  3. 50
      src/kyupy/sdf.py
  4. 241
      src/kyupy/wave_sim.py
  5. 4
      tests/conftest.py
  6. 112
      tests/test_wave_sim.py

4
src/kyupy/__init__.py

@ -17,6 +17,10 @@ import numpy as np
_pop_count_lut = np.asarray([bin(x).count('1') for x in range(256)]) _pop_count_lut = np.asarray([bin(x).count('1') for x in range(256)])
def cdiv(x, y):
return -(x // -y)
def popcount(a): def popcount(a):
"""Returns the number of 1-bits in a given packed numpy array.""" """Returns the number of 1-bits in a given packed numpy array."""
return np.sum(_pop_count_lut[a]) return np.sum(_pop_count_lut[a])

4
src/kyupy/circuit.py

@ -228,10 +228,10 @@ class Circuit:
Usually, nodes in the io_nodes list without any lines in their :py:attr:`Node.ins` list are primary inputs, Usually, nodes in the io_nodes list without any lines in their :py:attr:`Node.ins` list are primary inputs,
and nodes without any lines in their :py:attr:`Node.outs` list are regarded as primary outputs. and nodes without any lines in their :py:attr:`Node.outs` list are regarded as primary outputs.
""" """
self.cells = {} self.cells : dict[str, Node] = {}
"""A dictionary to access cells by name. """A dictionary to access cells by name.
""" """
self.forks = {} self.forks : dict[str, Node] = {}
"""A dictionary to access forks by name. """A dictionary to access forks by name.
""" """

50
src/kyupy/sdf.py

@ -15,6 +15,7 @@ import numpy as np
from lark import Lark, Transformer from lark import Lark, Transformer
from . import log, readtext from . import log, readtext
from .circuit import Circuit
from .techlib import TechLib from .techlib import TechLib
@ -27,17 +28,48 @@ class DelayFile:
""" """
def __init__(self, name, cells): def __init__(self, name, cells):
self.name = name self.name = name
if None in cells: self.interconnects = cells.get(None, None)
self.interconnects = cells[None]
else:
self.interconnects = None
self.cells = dict((n, l) for n, l in cells.items() if n) self.cells = dict((n, l) for n, l in cells.items() if n)
def __repr__(self): def __repr__(self):
return '\n'.join(f'{n}: {l}' for n, l in self.cells.items()) + '\n' + \ return '\n'.join(f'{n}: {l}' for n, l in self.cells.items()) + '\n' + \
'\n'.join(str(i) for i in self.interconnects) '\n'.join(str(i) for i in self.interconnects)
def annotation(self, circuit, tlib=TechLib(), dataset=1, interconnect=True, ffdelays=True): def iopaths(self, circuit:Circuit, tlib=TechLib()):
"""Constructs an ndarray containing all IOPATH delays.
All IOPATH delays for a node `n` are annotated to the line connected to the input pin specified in the IOPATH.
Axis 0: dataset (usually 3 datasets per SDF-file)
Axis 1: line index (e.g. `n.ins[0]`, `n.ins[1]`)
Axis 2: polarity of the transition at the IOPATH-input (e.g. at `n.ins[0]` or `n.ins[1]`), 0='rising/posedge', 1='falling/negedge'
Axis 3: polarity of the transition at the IOPATH-output (at `n.outs[0]`), 0='rising/posedge', 1='falling/negedge'
"""
def find_cell(name:str):
if name not in circuit.cells: name = name.replace('\\', '')
if name not in circuit.cells: name = name.replace('[', '_').replace(']', '_')
return circuit.cells.get(name, None)
delays = np.zeros((len(circuit.lines), 2, 2, 3)) # dataset last during construction.
for name, iopaths in self.cells.items():
if cell := find_cell(name):
for i_pin_spec, o_pin_spec, *dels in iopaths:
if i_pin_spec.startswith('(posedge '): i_pol_idxs = [0]
elif i_pin_spec.startswith('(negedge '): i_pol_idxs = [1]
else: i_pol_idxs = [0, 1]
i_pin_spec = re.sub(r'\((neg|pos)edge ([^)]+)\)', r'\2', i_pin_spec)
if line := cell.ins[tlib.pin_index(cell.kind, i_pin_spec)]:
delays[line, i_pol_idxs] = [d if len(d) > 0 else [0, 0, 0] for d in dels]
else:
log.warn(f'No line to annotate in circuit: {i_pin_spec} for {cell}')
else:
log.warn(f'Name from SDF not found in circuit: {name}')
return np.moveaxis(delays, -1, 0)
def annotation(self, circuit:Circuit, tlib=TechLib(), dataset=1, interconnect=True, ffdelays=True):
"""Constructs an 3-dimensional ndarray with timing data for each line in ``circuit``. """Constructs an 3-dimensional ndarray with timing data for each line in ``circuit``.
An IOPATH delay for a node is annotated to the line connected to the input pin specified in the IOPATH. An IOPATH delay for a node is annotated to the line connected to the input pin specified in the IOPATH.
@ -75,11 +107,9 @@ class DelayFile:
return sum(_delvals[idx][d] for d in dataset) / len(dataset) return sum(_delvals[idx][d] for d in dataset) / len(dataset)
return _delvals[idx][dataset] return _delvals[idx][dataset]
def find_cell(name): def find_cell(name:str):
if name not in circuit.cells: if name not in circuit.cells: name = name.replace('\\', '')
name = name.replace('\\', '') if name not in circuit.cells: name = name.replace('[', '_').replace(']', '_')
if name not in circuit.cells:
name = name.replace('[', '_').replace(']', '_')
if name not in circuit.cells: if name not in circuit.cells:
return None return None
return circuit.cells[name] return circuit.cells[name]

241
src/kyupy/wave_sim.py

@ -47,7 +47,7 @@ class WaveSim(sim.SimOps):
:param keep_waveforms: If disabled, memory of intermediate signal waveforms will be re-used. This greatly reduces :param keep_waveforms: If disabled, memory of intermediate signal waveforms will be re-used. This greatly reduces
memory footprint, but intermediate signal waveforms become unaccessible after a propagation. memory footprint, but intermediate signal waveforms become unaccessible after a propagation.
""" """
def __init__(self, circuit, timing, sims=8, c_caps=16, c_reuse=False, strip_forks=False): def __init__(self, circuit, delays, sims=8, c_caps=16, c_reuse=False, strip_forks=False):
assert c_caps > 0 and c_caps % 4 == 0 assert c_caps > 0 and c_caps % 4 == 0
super().__init__(circuit, c_caps=c_caps//4, c_reuse=c_reuse, strip_forks=strip_forks) super().__init__(circuit, c_caps=c_caps//4, c_reuse=c_reuse, strip_forks=strip_forks)
self.sims = sims self.sims = sims
@ -56,8 +56,8 @@ class WaveSim(sim.SimOps):
self.c_locs[...] *= 4 self.c_locs[...] *= 4
self.c_caps[...] *= 4 self.c_caps[...] *= 4
self.timing = np.zeros((self.c_locs_len, 2, 2)) self.delays = np.zeros((len(delays), self.c_locs_len, 2, 2), dtype=delays.dtype)
self.timing[:len(timing)] = timing self.delays[:, :delays.shape[1]] = delays
self.c = np.zeros((self.c_len, sims), dtype=np.float32) + TMAX self.c = np.zeros((self.c_len, sims), dtype=np.float32) + TMAX
self.s = np.zeros((11, self.s_len, sims), dtype=np.float32) self.s = np.zeros((11, self.s_len, sims), dtype=np.float32)
@ -128,7 +128,7 @@ class WaveSim(sim.SimOps):
sims = min(sims or self.sims, self.sims) sims = min(sims or self.sims, self.sims)
for op_start, op_stop in zip(self.level_starts, self.level_stops): for op_start, op_stop in zip(self.level_starts, self.level_stops):
level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, 0, sims, level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, 0, sims,
self.timing, self.params, sd, seed) self.delays, self.params, sd, seed)
def c_to_s(self, time=TMAX, sd=0.0, seed=1): def c_to_s(self, time=TMAX, sd=0.0, seed=1):
"""Simulates a capture operation at all sequential elements and primary outputs. """Simulates a capture operation at all sequential elements and primary outputs.
@ -173,7 +173,7 @@ def rand_gauss_cpu(seed, sd):
@numba.njit @numba.njit
def wave_eval_cpu(op, cbuf, c_locs, c_caps, st_idx, line_times, param, sd=0.0, seed=0): def wave_eval_cpu_old(op, cbuf, c_locs, c_caps, st_idx, line_times, param, sd=0.0, seed=0):
lut, z_idx, a_idx, b_idx, c_idx, d_idx = op lut, z_idx, a_idx, b_idx, c_idx, d_idx = op
# >>> same code as wave_eval_cpu (except rand_gauss_*pu()-calls) >>> # >>> same code as wave_eval_cpu (except rand_gauss_*pu()-calls) >>>
@ -191,7 +191,7 @@ def wave_eval_cpu(op, cbuf, c_locs, c_caps, st_idx, line_times, param, sd=0.0, s
a_cur = int(0) a_cur = int(0)
b_cur = int(0) b_cur = int(0)
c_cur = int(0) c_cur = int(0)
d_cur = int(0) d_cur = int(0)
z_cur = lut & 1 z_cur = lut & 1
if z_cur == 1: if z_cur == 1:
cbuf[z_mem, st_idx] = TMIN cbuf[z_mem, st_idx] = TMIN
@ -276,17 +276,116 @@ def wave_eval_cpu(op, cbuf, c_locs, c_caps, st_idx, line_times, param, sd=0.0, s
current_t = min(a, b, c, d) current_t = min(a, b, c, d)
# generate overflow flag or propagate from input # generate or propagate overflow flag
cbuf[z_mem + z_cur, st_idx] = TMAX_OVL if overflows > 0 else max(a, b, c, d) cbuf[z_mem + z_cur, st_idx] = TMAX_OVL if overflows > 0 else max(a, b, c, d)
@numba.njit @numba.njit
def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, st_start, st_stop, line_times, params, sd, seed): def wave_eval_cpu(op, cbuf, c_locs, c_caps, st_idx, delays, param, sd=0.0, seed=0):
lut, z_idx, a_idx, b_idx, c_idx, d_idx = op
# >>> same code as wave_eval_cpu (except rand_gauss_*pu()-calls) >>>
overflows = int(0)
if len(delays) > 1:
_rnd = (seed << 4) + (z_idx << 20) + (st_idx << 1)
for _ in range(4):
_rnd = int(0xDEECE66D) * _rnd + 0xB
delays = delays[_rnd % len(delays)]
else:
delays = delays[0]
a_mem = c_locs[a_idx]
b_mem = c_locs[b_idx]
c_mem = c_locs[c_idx]
d_mem = c_locs[d_idx]
z_mem = c_locs[z_idx]
z_cap = c_caps[z_idx]
a_cur = int(0)
b_cur = int(0)
c_cur = int(0)
d_cur = int(0)
z_cur = lut & 1
if z_cur == 1:
cbuf[z_mem, st_idx] = TMIN
z_val = z_cur
a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val]
b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val]
c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val]
d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val]
previous_t = TMIN
current_t = min(a, b, c, d)
inputs = int(0)
while current_t < TMAX:
if a == current_t:
a_cur += 1
inputs ^= 1
thresh = delays[a_idx, 0, z_val]
a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val]
next_t = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val ^ 1]
elif b == current_t:
b_cur += 1
inputs ^= 2
thresh = delays[b_idx, 0, z_val]
b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val]
next_t = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val ^ 1]
elif c == current_t:
c_cur += 1
inputs ^= 4
thresh = delays[c_idx, 0, z_val]
c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val]
next_t = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val ^ 1]
else:
d_cur += 1
inputs ^= 8
thresh = delays[d_idx, 0, z_val]
d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val]
next_t = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val ^ 1]
if (z_cur & 1) != ((lut >> inputs) & 1):
# we generate an edge in z_mem, if ...
if (z_cur == 0 # it is the first edge in z_mem ...
or next_t < current_t # -OR- the next edge on SAME input is EARLIER (need current edge to filter BOTH in next iteration) ...
or (current_t - previous_t) > thresh # -OR- the generated hazard is wider than pulse threshold.
):
if z_cur < (z_cap - 1): # enough space in z_mem?
cbuf[z_mem + z_cur, st_idx] = current_t
previous_t = current_t
z_cur += 1
else:
overflows += 1
previous_t = cbuf[z_mem + z_cur - 1, st_idx]
z_cur -= 1
else:
z_cur -= 1
previous_t = cbuf[z_mem + z_cur - 1, st_idx] if z_cur > 0 else TMIN
# output value of cell changed. update all delayed inputs.
z_val = z_val ^ 1
a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val]
b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val]
c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val]
d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val]
current_t = min(a, b, c, d)
# generate or propagate overflow flag
cbuf[z_mem + z_cur, st_idx] = TMAX_OVL if overflows > 0 else max(a, b, c, d)
@numba.njit
def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, st_start, st_stop, delays, params, sd, seed):
overflows = 0 overflows = 0
for op_idx in range(op_start, op_stop): for op_idx in range(op_start, op_stop):
op = ops[op_idx] op = ops[op_idx]
for st_idx in range(st_start, st_stop): for st_idx in range(st_start, st_stop):
wave_eval_cpu(op, c, c_locs, c_caps, st_idx, line_times, params[st_idx], sd, seed) wave_eval_cpu(op, c, c_locs, c_caps, st_idx, delays, params[st_idx], sd, seed)
@numba.njit @numba.njit
@ -342,15 +441,15 @@ class WaveSimCuda(WaveSim):
All internal memories are mirrored into GPU memory upon construction. All internal memories are mirrored into GPU memory upon construction.
Some operations like access to single waveforms can involve large communication overheads. Some operations like access to single waveforms can involve large communication overheads.
""" """
def __init__(self, circuit, timing, sims=8, c_caps=16, c_reuse=False, strip_forks=False): def __init__(self, circuit, delays, sims=8, c_caps=16, c_reuse=False, strip_forks=False):
super().__init__(circuit, timing, sims, c_caps, c_reuse, strip_forks) super().__init__(circuit, delays, sims, c_caps, c_reuse, strip_forks)
self.c = cuda.to_device(self.c) self.c = cuda.to_device(self.c)
self.s = cuda.to_device(self.s) self.s = cuda.to_device(self.s)
self.ops = cuda.to_device(self.ops) self.ops = cuda.to_device(self.ops)
self.c_locs = cuda.to_device(self.c_locs) self.c_locs = cuda.to_device(self.c_locs)
self.c_caps = cuda.to_device(self.c_caps) self.c_caps = cuda.to_device(self.c_caps)
self.timing = cuda.to_device(self.timing) self.delays = cuda.to_device(self.delays)
self.params = cuda.to_device(self.params) self.params = cuda.to_device(self.params)
self._block_dim = (32, 16) self._block_dim = (32, 16)
@ -369,7 +468,7 @@ class WaveSimCuda(WaveSim):
for op_start, op_stop in zip(self.level_starts, self.level_stops): for op_start, op_stop in zip(self.level_starts, self.level_stops):
grid_dim = self._grid_dim(sims, op_stop - op_start) grid_dim = self._grid_dim(sims, op_stop - op_start)
wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, int(0), wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, int(0),
sims, self.timing, self.params, sd, seed) sims, self.delays, self.params, sd, seed)
cuda.synchronize() cuda.synchronize()
def c_to_s(self, time=TMAX, sd=0.0, seed=1): def c_to_s(self, time=TMAX, sd=0.0, seed=1):
@ -423,7 +522,7 @@ def rand_gauss_gpu(seed, sd):
@cuda.jit() @cuda.jit()
def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, st_start, st_stop, line_times, param, sd, seed): def wave_eval_gpu_old(ops, op_start, op_stop, cbuf, c_locs, c_caps, st_start, st_stop, line_times, param, sd, seed):
x, y = cuda.grid(2) x, y = cuda.grid(2)
st_idx = st_start + x st_idx = st_start + x
op_idx = op_start + y op_idx = op_start + y
@ -539,7 +638,119 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, st_start, st_sto
current_t = min(a, b, c, d) current_t = min(a, b, c, d)
# generate overflow flag or propagate from input # generate or propagate overflow flag
cbuf[z_mem + z_cur, st_idx] = TMAX_OVL if overflows > 0 else max(a, b, c, d)
@cuda.jit()
def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, st_start, st_stop, delays, param, sd, seed):
x, y = cuda.grid(2)
st_idx = st_start + x
op_idx = op_start + y
if st_idx >= st_stop: return
if op_idx >= op_stop: return
lut = ops[op_idx, 0]
z_idx = ops[op_idx, 1]
a_idx = ops[op_idx, 2]
b_idx = ops[op_idx, 3]
c_idx = ops[op_idx, 4]
d_idx = ops[op_idx, 5]
param = param[st_idx]
# >>> same code as wave_eval_cpu (except rand_gauss_*pu()-calls) >>>
overflows = int(0)
if len(delays) > 1:
_rnd = (seed << 4) + (z_idx << 20) + (st_idx << 1)
for _ in range(4):
_rnd = int(0xDEECE66D) * _rnd + 0xB
delays = delays[_rnd % len(delays)]
else:
delays = delays[0]
a_mem = c_locs[a_idx]
b_mem = c_locs[b_idx]
c_mem = c_locs[c_idx]
d_mem = c_locs[d_idx]
z_mem = c_locs[z_idx]
z_cap = c_caps[z_idx]
a_cur = int(0)
b_cur = int(0)
c_cur = int(0)
d_cur = int(0)
z_cur = lut & 1
if z_cur == 1:
cbuf[z_mem, st_idx] = TMIN
z_val = z_cur
a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val]
b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val]
c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val]
d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val]
previous_t = TMIN
current_t = min(a, b, c, d)
inputs = int(0)
while current_t < TMAX:
if a == current_t:
a_cur += 1
inputs ^= 1
thresh = delays[a_idx, 0, z_val]
a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val]
next_t = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val ^ 1]
elif b == current_t:
b_cur += 1
inputs ^= 2
thresh = delays[b_idx, 0, z_val]
b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val]
next_t = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val ^ 1]
elif c == current_t:
c_cur += 1
inputs ^= 4
thresh = delays[c_idx, 0, z_val]
c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val]
next_t = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val ^ 1]
else:
d_cur += 1
inputs ^= 8
thresh = delays[d_idx, 0, z_val]
d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val]
next_t = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val ^ 1]
if (z_cur & 1) != ((lut >> inputs) & 1):
# we generate an edge in z_mem, if ...
if (z_cur == 0 # it is the first edge in z_mem ...
or next_t < current_t # -OR- the next edge on SAME input is EARLIER (need current edge to filter BOTH in next iteration) ...
or (current_t - previous_t) > thresh # -OR- the generated hazard is wider than pulse threshold.
):
if z_cur < (z_cap - 1): # enough space in z_mem?
cbuf[z_mem + z_cur, st_idx] = current_t
previous_t = current_t
z_cur += 1
else:
overflows += 1
previous_t = cbuf[z_mem + z_cur - 1, st_idx]
z_cur -= 1
else:
z_cur -= 1
previous_t = cbuf[z_mem + z_cur - 1, st_idx] if z_cur > 0 else TMIN
# output value of cell changed. update all delayed inputs.
z_val = z_val ^ 1
a = cbuf[a_mem + a_cur, st_idx] + delays[a_idx, 0, z_val]
b = cbuf[b_mem + b_cur, st_idx] + delays[b_idx, 0, z_val]
c = cbuf[c_mem + c_cur, st_idx] + delays[c_idx, 0, z_val]
d = cbuf[d_mem + d_cur, st_idx] + delays[d_idx, 0, z_val]
current_t = min(a, b, c, d)
# generate or propagate overflow flag
cbuf[z_mem + z_cur, st_idx] = TMAX_OVL if overflows > 0 else max(a, b, c, d) cbuf[z_mem + z_cur, st_idx] = TMAX_OVL if overflows > 0 else max(a, b, c, d)

4
tests/conftest.py

@ -13,6 +13,6 @@ def b14_circuit(mydir):
return verilog.load(mydir / 'b14.v.gz', branchforks=True) return verilog.load(mydir / 'b14.v.gz', branchforks=True)
@pytest.fixture(scope='session') @pytest.fixture(scope='session')
def b14_timing(mydir, b14_circuit): def b14_delays(mydir, b14_circuit):
from kyupy import sdf from kyupy import sdf
return sdf.load(mydir / 'b14.sdf.gz').annotation(b14_circuit) return sdf.load(mydir / 'b14.sdf.gz').iopaths(b14_circuit)[1:2]

112
tests/test_wave_sim.py

@ -11,28 +11,28 @@ def test_nand_delays():
c = np.full((5*16, 1), TMAX) # 5 waveforms of capacity 16 c = np.full((5*16, 1), TMAX) # 5 waveforms of capacity 16
c_locs = np.zeros((5,), dtype='int') c_locs = np.zeros((5,), dtype='int')
c_caps = np.zeros((5,), dtype='int') c_caps = np.zeros((5,), dtype='int')
for i in range(5): c_locs[i], c_caps[i] = i*16, 16 # 1:1 mapping for i in range(5): c_locs[i], c_caps[i] = i*16, 16 # 1:1 mapping
# SDF specifies IOPATH delays with respect to output polarity # SDF specifies IOPATH delays with respect to output polarity
# SDF pulse rejection value is determined by IOPATH causing last transition and polarity of last transition # SDF pulse rejection value is determined by IOPATH causing last transition and polarity of last transition
line_times = np.zeros((5, 2, 2)) delays = np.zeros((1, 5, 2, 2))
line_times[0, 0, 0] = 0.1 # A -> Z rise delay delays[0, 0, 0, 0] = 0.1 # A -> Z rise delay
line_times[0, 0, 1] = 0.2 # A -> Z fall delay delays[0, 0, 0, 1] = 0.2 # A -> Z fall delay
line_times[0, 1, 0] = 0.1 # A -> Z negative pulse limit (terminate in rising Z) delays[0, 0, 1, 0] = 0.1 # A -> Z negative pulse limit (terminate in rising Z)
line_times[0, 1, 1] = 0.2 # A -> Z positive pulse limit delays[0, 0, 1, 1] = 0.2 # A -> Z positive pulse limit
line_times[1, :, 0] = 0.3 # as above for B -> Z delays[0, 1, :, 0] = 0.3 # as above for B -> Z
line_times[1, :, 1] = 0.4 delays[0, 1, :, 1] = 0.4
line_times[2, :, 0] = 0.5 # as above for C -> Z delays[0, 2, :, 0] = 0.5 # as above for C -> Z
line_times[2, :, 1] = 0.6 delays[0, 2, :, 1] = 0.6
line_times[3, :, 0] = 0.7 # as above for D -> Z delays[0, 3, :, 0] = 0.7 # as above for D -> Z
line_times[3, :, 1] = 0.8 delays[0, 3, :, 1] = 0.8
sdata = np.asarray([1, -1, 0, 0], dtype='float32') sdata = np.asarray([1, -1, 0, 0], dtype='float32')
def wave_assert(inputs, output): def wave_assert(inputs, output):
for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i
wave_eval_cpu(op, c, c_locs, c_caps, 0, line_times, sdata) wave_eval_cpu(op, c, c_locs, c_caps, 0, delays, sdata)
for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[4,i], v) for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[4,i], v)
wave_assert([[TMAX,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(0,0,1,1) => 1 wave_assert([[TMAX,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(0,0,1,1) => 1
@ -50,75 +50,74 @@ def test_nand_delays():
def test_tiny_circuit(): def test_tiny_circuit():
c = bench.parse('input(x, y) output(a, o, n) a=and(x,y) o=or(x,y) n=not(x)') c = bench.parse('input(x, y) output(a, o, n) a=and(x,y) o=or(x,y) n=not(x)')
lt = np.zeros((len(c.lines), 2, 2)) delays = np.full((1, len(c.lines), 2, 2), 1.0) # unit delay for all lines
lt[:,0,:] = 1.0 # unit delay for all lines wsim = WaveSim(c, delays)
wsim = WaveSim(c, lt)
assert wsim.s.shape[1] == 5 assert wsim.s.shape[1] == 5
# values for x # values for x
wsim.s[:3,0,0] = 0, 0.1, 0 wsim.s[:3,0,0] = 0, 10, 0
wsim.s[:3,0,1] = 0, 0.2, 1 wsim.s[:3,0,1] = 0, 20, 1
wsim.s[:3,0,2] = 1, 0.3, 0 wsim.s[:3,0,2] = 1, 30, 0
wsim.s[:3,0,3] = 1, 0.4, 1 wsim.s[:3,0,3] = 1, 40, 1
# values for y # values for y
wsim.s[:3,1,0] = 1, 0.5, 0 wsim.s[:3,1,0] = 1, 50, 0
wsim.s[:3,1,1] = 1, 0.6, 0 wsim.s[:3,1,1] = 1, 60, 0
wsim.s[:3,1,2] = 1, 0.7, 0 wsim.s[:3,1,2] = 1, 70, 0
wsim.s[:3,1,3] = 0, 0.8, 1 wsim.s[:3,1,3] = 0, 80, 1
wsim.s_to_c() wsim.s_to_c()
x_c_loc = wsim.c_locs[wsim.ppi_offset+0] # check x waveforms x_c_loc = wsim.c_locs[wsim.ppi_offset+0] # check x waveforms
np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 0], [TMAX, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 0], [TMAX, TMAX, TMAX])
np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 1], [0.2, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 1], [20, TMAX, TMAX])
np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 2], [TMIN, 0.3, TMAX]) np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 2], [TMIN, 30, TMAX])
np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 3], [TMIN, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 3], [TMIN, TMAX, TMAX])
y_c_loc = wsim.c_locs[wsim.ppi_offset+1] # check y waveforms y_c_loc = wsim.c_locs[wsim.ppi_offset+1] # check y waveforms
np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 0], [TMIN, 0.5, TMAX]) np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 0], [TMIN, 50, TMAX])
np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 1], [TMIN, 0.6, TMAX]) np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 1], [TMIN, 60, TMAX])
np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 2], [TMIN, 0.7, TMAX]) np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 2], [TMIN, 70, TMAX])
np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 3], [0.8, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 3], [80, TMAX, TMAX])
wsim.c_prop() wsim.c_prop()
a_c_loc = wsim.c_locs[wsim.ppo_offset+2] # check a waveforms a_c_loc = wsim.c_locs[wsim.ppo_offset+2] # check a waveforms
np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 0], [TMAX, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 0], [TMAX, TMAX, TMAX])
np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 1], [1.2, 1.6, TMAX]) np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 1], [21, 61, TMAX])
np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 2], [TMIN, 1.3, TMAX]) np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 2], [TMIN, 31, TMAX])
np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 3], [1.8, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 3], [81, TMAX, TMAX])
o_c_loc = wsim.c_locs[wsim.ppo_offset+3] # check o waveforms o_c_loc = wsim.c_locs[wsim.ppo_offset+3] # check o waveforms
np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 0], [TMIN, 1.5, TMAX]) np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 0], [TMIN, 51, TMAX])
np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 1], [TMIN, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 1], [TMIN, TMAX, TMAX])
np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 2], [TMIN, 1.7, TMAX]) np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 2], [TMIN, 71, TMAX])
np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 3], [TMIN, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 3], [TMIN, TMAX, TMAX])
n_c_loc = wsim.c_locs[wsim.ppo_offset+4] # check n waveforms n_c_loc = wsim.c_locs[wsim.ppo_offset+4] # check n waveforms
np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 0], [TMIN, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 0], [TMIN, TMAX, TMAX])
np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 1], [TMIN, 1.2, TMAX]) np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 1], [TMIN, 21, TMAX])
np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 2], [1.3, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 2], [31, TMAX, TMAX])
np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 3], [TMAX, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 3], [TMAX, TMAX, TMAX])
wsim.c_to_s() wsim.c_to_s()
# check a captures # check a captures
np.testing.assert_allclose(wsim.s[3:7, 2, 0], [0, TMAX, TMIN, 0]) np.testing.assert_allclose(wsim.s[3:7, 2, 0], [0, TMAX, TMIN, 0])
np.testing.assert_allclose(wsim.s[3:7, 2, 1], [0, 1.2, 1.6, 0]) np.testing.assert_allclose(wsim.s[3:7, 2, 1], [0, 21, 61, 0])
np.testing.assert_allclose(wsim.s[3:7, 2, 2], [1, 1.3, 1.3, 0]) np.testing.assert_allclose(wsim.s[3:7, 2, 2], [1, 31, 31, 0])
np.testing.assert_allclose(wsim.s[3:7, 2, 3], [0, 1.8, 1.8, 1]) np.testing.assert_allclose(wsim.s[3:7, 2, 3], [0, 81, 81, 1])
# check o captures # check o captures
np.testing.assert_allclose(wsim.s[3:7, 3, 0], [1, 1.5, 1.5, 0]) np.testing.assert_allclose(wsim.s[3:7, 3, 0], [1, 51, 51, 0])
np.testing.assert_allclose(wsim.s[3:7, 3, 1], [1, TMAX, TMIN, 1]) np.testing.assert_allclose(wsim.s[3:7, 3, 1], [1, TMAX, TMIN, 1])
np.testing.assert_allclose(wsim.s[3:7, 3, 2], [1, 1.7, 1.7, 0]) np.testing.assert_allclose(wsim.s[3:7, 3, 2], [1, 71, 71, 0])
np.testing.assert_allclose(wsim.s[3:7, 3, 3], [1, TMAX, TMIN, 1]) np.testing.assert_allclose(wsim.s[3:7, 3, 3], [1, TMAX, TMIN, 1])
# check o captures # check o captures
np.testing.assert_allclose(wsim.s[3:7, 4, 0], [1, TMAX, TMIN, 1]) np.testing.assert_allclose(wsim.s[3:7, 4, 0], [1, TMAX, TMIN, 1])
np.testing.assert_allclose(wsim.s[3:7, 4, 1], [1, 1.2, 1.2, 0]) np.testing.assert_allclose(wsim.s[3:7, 4, 1], [1, 21, 21, 0])
np.testing.assert_allclose(wsim.s[3:7, 4, 2], [0, 1.3, 1.3, 1]) np.testing.assert_allclose(wsim.s[3:7, 4, 2], [0, 31, 31, 1])
np.testing.assert_allclose(wsim.s[3:7, 4, 3], [0, TMAX, TMIN, 0]) np.testing.assert_allclose(wsim.s[3:7, 4, 3], [0, TMAX, TMIN, 0])
@ -157,13 +156,16 @@ def compare_to_logic_sim(wsim: WaveSim):
np.testing.assert_allclose(resp, exp) np.testing.assert_allclose(resp, exp)
def test_b14(b14_circuit, b14_timing): def test_b14(b14_circuit, b14_delays):
compare_to_logic_sim(WaveSim(b14_circuit, b14_timing, 8)) compare_to_logic_sim(WaveSim(b14_circuit, b14_delays, 8))
def test_b14_strip_forks(b14_circuit, b14_delays):
compare_to_logic_sim(WaveSim(b14_circuit, b14_delays, 8, strip_forks=True))
def test_b14_strip_forks(b14_circuit, b14_timing):
compare_to_logic_sim(WaveSim(b14_circuit, b14_timing, 8, strip_forks=True))
def test_b14_cuda(b14_circuit, b14_delays):
compare_to_logic_sim(WaveSimCuda(b14_circuit, b14_delays, 8, strip_forks=True))
def test_b14_cuda(b14_circuit, b14_timing): if __name__ == '__main__':
compare_to_logic_sim(WaveSimCuda(b14_circuit, b14_timing, 8, strip_forks=True)) test_nand_delays()
Loading…
Cancel
Save