Browse Source

simprim, vat refactor, batchrange

devel
Stefan Holst 2 years ago
parent
commit
5566b80e52
  1. 5
      src/kyupy/__init__.py
  2. 64
      src/kyupy/logic_sim.py
  3. 226
      src/kyupy/sim.py
  4. 148
      src/kyupy/wave_sim.py
  5. 88
      tests/test_wave_sim.py

5
src/kyupy/__init__.py

@ -76,6 +76,11 @@ def hr_time(seconds):
return s return s
def batchrange(nitems, maxsize):
for offset in range(0, nitems, maxsize):
yield offset, min(nitems-offset, maxsize)
class Timer: class Timer:
def __init__(self, s=0): self.s = s def __init__(self, s=0): self.s = s
def __enter__(self): self.start_time = time.perf_counter(); return self def __enter__(self): self.start_time = time.perf_counter(); return self

64
src/kyupy/logic_sim.py

@ -11,10 +11,8 @@ import math
import numpy as np import numpy as np
from . import numba, logic, hr_bytes, sim from . import numba, logic, hr_bytes, sim
from .sim import SimOps, SimPrim
class LogicSim(sim.SimOps):
class LogicSim(SimOps):
"""A bit-parallel naïve combinational simulator for 2-, 4-, or 8-valued logic. """A bit-parallel naïve combinational simulator for 2-, 4-, or 8-valued logic.
:param circuit: The circuit to simulate. :param circuit: The circuit to simulate.
@ -36,17 +34,17 @@ class LogicSim(SimOps):
self.s = np.zeros((2, self.s_len, 3, nbytes), dtype=np.uint8) self.s = np.zeros((2, self.s_len, 3, nbytes), dtype=np.uint8)
self.s[:,:,1,:] = 255 # unassigned self.s[:,:,1,:] = 255 # unassigned
self.pi_s_locs = np.flatnonzero(self.vat[self.ppi_offset+np.arange(len(self.circuit.io_nodes)), 0] >= 0) self.pi_s_locs = np.flatnonzero(self.c_locs[self.ppi_offset+np.arange(len(self.circuit.io_nodes))] >= 0)
self.po_s_locs = np.flatnonzero(self.vat[self.ppo_offset+np.arange(len(self.circuit.io_nodes)), 0] >= 0) self.po_s_locs = np.flatnonzero(self.c_locs[self.ppo_offset+np.arange(len(self.circuit.io_nodes))] >= 0)
self.ppio_s_locs = np.arange(len(self.circuit.io_nodes), len(self.s_nodes)) self.ppio_s_locs = np.arange(len(self.circuit.io_nodes), len(self.s_nodes))
self.pippi_s_locs = np.concatenate([self.pi_s_locs, self.ppio_s_locs]) self.pippi_s_locs = np.concatenate([self.pi_s_locs, self.ppio_s_locs])
self.poppo_s_locs = np.concatenate([self.po_s_locs, self.ppio_s_locs]) self.poppo_s_locs = np.concatenate([self.po_s_locs, self.ppio_s_locs])
self.pi_c_locs = self.vat[self.ppi_offset+self.pi_s_locs, 0] self.pi_c_locs = self.c_locs[self.ppi_offset+self.pi_s_locs]
self.po_c_locs = self.vat[self.ppo_offset+self.po_s_locs, 0] self.po_c_locs = self.c_locs[self.ppo_offset+self.po_s_locs]
self.ppi_c_locs = self.vat[self.ppi_offset+self.ppio_s_locs, 0] self.ppi_c_locs = self.c_locs[self.ppi_offset+self.ppio_s_locs]
self.ppo_c_locs = self.vat[self.ppo_offset+self.ppio_s_locs, 0] self.ppo_c_locs = self.c_locs[self.ppo_offset+self.ppio_s_locs]
self.pippi_c_locs = np.concatenate([self.pi_c_locs, self.ppi_c_locs]) self.pippi_c_locs = np.concatenate([self.pi_c_locs, self.ppi_c_locs])
self.poppo_c_locs = np.concatenate([self.po_c_locs, self.ppo_c_locs]) self.poppo_c_locs = np.concatenate([self.po_c_locs, self.ppo_c_locs])
@ -103,34 +101,34 @@ class LogicSim(SimOps):
nbytes = (sims - 1) // 8 + 1 nbytes = (sims - 1) // 8 + 1
if self.m == 2: if self.m == 2:
if inject_cb is None: if inject_cb is None:
_prop_cpu(self.ops, self.vat, self.c[...,:nbytes]) _prop_cpu(self.ops, self.c_locs, self.c[...,:nbytes])
else: else:
for op, o0, i0, i1, i2, i3 in self.ops: for op, o0, i0, i1, i2, i3 in self.ops:
o0, i0, i1, i2, i3 = [self.vat[x,0] for x in (o0, i0, i1, i2, i3)] o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)]
if op == SimPrim.BUF1: self.c[o0]=self.c[i0] if op == sim.BUF1: self.c[o0]=self.c[i0]
elif op == SimPrim.INV1: self.c[o0] = ~self.c[i0] elif op == sim.INV1: self.c[o0] = ~self.c[i0]
elif op == SimPrim.AND2: self.c[o0] = self.c[i0] & self.c[i1] elif op == sim.AND2: self.c[o0] = self.c[i0] & self.c[i1]
elif op == SimPrim.NAND2: self.c[o0] = ~(self.c[i0] & self.c[i1]) elif op == sim.NAND2: self.c[o0] = ~(self.c[i0] & self.c[i1])
elif op == SimPrim.OR2: self.c[o0] = self.c[i0] | self.c[i1] elif op == sim.OR2: self.c[o0] = self.c[i0] | self.c[i1]
elif op == SimPrim.NOR2: self.c[o0] = ~(self.c[i0] | self.c[i1]) elif op == sim.NOR2: self.c[o0] = ~(self.c[i0] | self.c[i1])
elif op == SimPrim.XOR2: self.c[o0] = self.c[i0] ^ self.c[i1] elif op == sim.XOR2: self.c[o0] = self.c[i0] ^ self.c[i1]
elif op == SimPrim.XNOR2: self.c[o0] = ~(self.c[i0] ^ self.c[i1]) elif op == sim.XNOR2: self.c[o0] = ~(self.c[i0] ^ self.c[i1])
else: print(f'unknown SimPrim {op}') else: print(f'unknown sim {op}')
inject_cb(o0, self.s[o0]) inject_cb(o0, self.s[o0])
elif self.m == 4: elif self.m == 4:
pass pass
else: else:
for op, o0, i0, i1, i2, i3 in self.ops: for op, o0, i0, i1, i2, i3 in self.ops:
o0, i0, i1, i2, i3 = [self.vat[x,0] for x in (o0, i0, i1, i2, i3)] o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)]
if op == SimPrim.BUF1: self.c[o0]=self.c[i0] if op == sim.BUF1: self.c[o0]=self.c[i0]
elif op == SimPrim.INV1: logic.bp_not(self.c[o0], self.c[i0]) elif op == sim.INV1: logic.bp_not(self.c[o0], self.c[i0])
elif op == SimPrim.AND2: logic.bp_and(self.c[o0], self.c[i0], self.c[i1]) elif op == sim.AND2: logic.bp_and(self.c[o0], self.c[i0], self.c[i1])
elif op == SimPrim.NAND2: logic.bp_and(self.c[o0], self.c[i0], self.c[i1]); logic.bp_not(self.c[o0], self.c[o0]) elif op == sim.NAND2: logic.bp_and(self.c[o0], self.c[i0], self.c[i1]); logic.bp_not(self.c[o0], self.c[o0])
elif op == SimPrim.OR2: logic.bp_or(self.c[o0], self.c[i0], self.c[i1]) elif op == sim.OR2: logic.bp_or(self.c[o0], self.c[i0], self.c[i1])
elif op == SimPrim.NOR2: logic.bp_or(self.c[o0], self.c[i0], self.c[i1]); logic.bp_not(self.c[o0], self.c[o0]) elif op == sim.NOR2: logic.bp_or(self.c[o0], self.c[i0], self.c[i1]); logic.bp_not(self.c[o0], self.c[o0])
elif op == SimPrim.XOR2: logic.bp_xor(self.c[o0], self.c[i0], self.c[i1]) elif op == sim.XOR2: logic.bp_xor(self.c[o0], self.c[i0], self.c[i1])
elif op == SimPrim.XNOR2: logic.bp_xor(self.c[o0], self.c[i0], self.c[i1]); logic.bp_not(self.c[o0], self.c[o0]) elif op == sim.XNOR2: logic.bp_xor(self.c[o0], self.c[i0], self.c[i1]); logic.bp_not(self.c[o0], self.c[o0])
else: print(f'unknown SimPrim {op}') else: print(f'unknown sim {op}')
if inject_cb is not None: inject_cb(o0, self.s[o0]) if inject_cb is not None: inject_cb(o0, self.s[o0])
def s_ppo_to_ppi(self): def s_ppo_to_ppi(self):
@ -159,9 +157,9 @@ class LogicSim(SimOps):
@numba.njit @numba.njit
def _prop_cpu(ops, vat, c): def _prop_cpu(ops, c_locs, c):
for op, o0, i0, i1, i2, i3 in ops: for op, o0, i0, i1, i2, i3 in ops:
o0, i0, i1, i2, i3 = [vat[x,0] for x in (o0, i0, i1, i2, i3)] o0, i0, i1, i2, i3 = [c_locs[x] for x in (o0, i0, i1, i2, i3)]
if op == sim.BUF1: c[o0]=c[i0] if op == sim.BUF1: c[o0]=c[i0]
elif op == sim.INV1: c[o0] = ~c[i0] elif op == sim.INV1: c[o0] = ~c[i0]
elif op == sim.AND2: c[o0] = c[i0] & c[i1] elif op == sim.AND2: c[o0] = c[i0] & c[i1]
@ -170,4 +168,4 @@ def _prop_cpu(ops, vat, c):
elif op == sim.NOR2: c[o0] = ~(c[i0] | c[i1]) elif op == sim.NOR2: c[o0] = ~(c[i0] | c[i1])
elif op == sim.XOR2: c[o0] = c[i0] ^ c[i1] elif op == sim.XOR2: c[o0] = c[i0] ^ c[i1]
elif op == sim.XNOR2: c[o0] = ~(c[i0] ^ c[i1]) elif op == sim.XNOR2: c[o0] = ~(c[i0] ^ c[i1])
else: print(f'unknown SimPrim {op}') else: print(f'unknown sim {op}')

226
src/kyupy/sim.py

@ -4,117 +4,70 @@ from bisect import bisect, insort_left
import numpy as np import numpy as np
BUF1 = 0b1010_1010_1010_1010 BUF1 = np.uint16(0b1010_1010_1010_1010)
INV1 = 0b0101_0101_0101_0101 INV1 = ~BUF1
NAND4 = 0b0111_1111_1111_1111 AND2 = np.uint16(0b1000_1000_1000_1000)
NAND3 = 0b0111_1111_0111_1111 AND3 = np.uint16(0b1000_0000_1000_0000)
NAND2 = 0b0111_0111_0111_0111 AND4 = np.uint16(0b1000_0000_0000_0000)
NOR4 = 0b0000_0000_0000_0001 NAND2, NAND3, NAND4 = ~AND2, ~AND3, ~AND4
NOR3 = 0b0000_0001_0000_0001
NOR2 = 0b0001_0001_0001_0001 OR2 = np.uint16(0b1110_1110_1110_1110)
OR3 = np.uint16(0b1111_1110_1111_1110)
AND4 = 0b1000_0000_0000_0000 OR4 = np.uint16(0b1111_1111_1111_1110)
AND3 = 0b1000_0000_1000_0000
AND2 = 0b1000_1000_1000_1000 NOR2, NOR3, NOR4 = ~OR2, ~OR3, ~OR4
OR4 = 0b1111_1111_1111_1110 XOR2 = np.uint16(0b0110_0110_0110_0110)
OR3 = 0b1111_1110_1111_1110 XOR3 = np.uint16(0b1001_0110_1001_0110)
OR2 = 0b1110_1110_1110_1110 XOR4 = np.uint16(0b0110_1001_1001_0110)
XOR4 = 0b0110_1001_1001_0110 XNOR2, XNOR3, XNOR4 = ~XOR2, ~XOR3, ~XOR4
XOR3 = 0b1001_0110_1001_0110
XOR2 = 0b0110_0110_0110_0110 AO21 = np.uint16(0b1110_1010_1110_1010)
AO22 = np.uint16(0b1111_1000_1000_1000)
XNOR4 = 0b1001_0110_0110_1001 OA21 = np.uint16(0b1010_1000_1010_1000)
XNOR3 = 0b0110_1001_0110_1001 OA22 = np.uint16(0b1110_1110_1110_0000)
XNOR2 = 0b1001_1001_1001_1001
AOI21, AOI22, OAI21, OAI22 = ~AO21, ~AO22, ~OA21, ~OA22
AO22 = 0b1111_1000_1000_1000
AOI22 = 0b0000_0111_0111_0111 MUX21 = np.uint16(0b1110_0100_1110_0100)
AO21 = 0b1110_1010_1110_1010
AOI21 = 0b0001_0101_0001_0101 names = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16)])
OA22 = 0b1110_1110_1110_0000
OAI22 = 0b0001_0001_0001_1111 kind_prefixes = {
OA21 = 0b1010_1000_1010_1000 'nand': (NAND4, NAND3, NAND2),
OAI21 = 0b0101_0111_0101_0111 'nor': (NOR4, NOR3, NOR2),
MUX21 = 0b1110_0100_1110_0100 'and': (AND4, AND3, AND2),
'or': (OR4, OR3, OR2),
class SimPrim: 'xor': (XOR4, XOR3, XOR2),
BUF1 = 0b1010_1010_1010_1010 'xnor': (XNOR4, XNOR3, XNOR2),
INV1 = 0b0101_0101_0101_0101
'not': (INV1, INV1, INV1),
NAND4 = 0b0111_1111_1111_1111 'inv': (INV1, INV1, INV1),
NAND3 = 0b0111_1111_0111_1111 'ibuf': (INV1, INV1, INV1),
NAND2 = 0b0111_0111_0111_0111 '__const1__': (INV1, INV1, INV1),
'tieh': (INV1, INV1, INV1),
NOR4 = 0b0000_0000_0000_0001
NOR3 = 0b0000_0001_0000_0001 'buf': (BUF1, BUF1, BUF1),
NOR2 = 0b0001_0001_0001_0001 'nbuf': (BUF1, BUF1, BUF1),
'delln': (BUF1, BUF1, BUF1),
AND4 = 0b1000_0000_0000_0000 '__const0__': (BUF1, BUF1, BUF1),
AND3 = 0b1000_0000_1000_0000 'tiel': (BUF1, BUF1, BUF1),
AND2 = 0b1000_1000_1000_1000
'ao22': (AO22, AO22, AO22),
OR4 = 0b1111_1111_1111_1110 'aoi22': (AOI22, AOI22, AOI22),
OR3 = 0b1111_1110_1111_1110 'ao21': (AO21, AO21, AO21),
OR2 = 0b1110_1110_1110_1110 'aoi21': (AOI21, AOI21, AOI21),
XOR4 = 0b0110_1001_1001_0110 'oa22': (OA22, OA22, OA22),
XOR3 = 0b1001_0110_1001_0110 'oai22': (OAI22, OAI22, OAI22),
XOR2 = 0b0110_0110_0110_0110 'oa21': (OA21, OA21, OA21),
'oai21': (OAI21, OAI21, OAI21),
XNOR4 = 0b1001_0110_0110_1001
XNOR3 = 0b0110_1001_0110_1001 'mux21': (MUX21, MUX21, MUX21),
XNOR2 = 0b1001_1001_1001_1001 }
AO22 = 0b1111_1000_1000_1000
AOI22 = 0b0000_0111_0111_0111
AO21 = 0b1110_1010_1110_1010
AOI21 = 0b0001_0101_0001_0101
OA22 = 0b1110_1110_1110_0000
OAI22 = 0b0001_0001_0001_1111
OA21 = 0b1010_1000_1010_1000
OAI21 = 0b0101_0111_0101_0111
MUX21 = 0b1110_0100_1110_0100
kind_prefixes = {
'nand': (NAND4, NAND3, NAND2),
'nor': (NOR4, NOR3, NOR2),
'and': (AND4, AND3, AND2),
'or': (OR4, OR3, OR2),
'xor': (XOR4, XOR3, XOR2),
'xnor': (XNOR4, XNOR3, XNOR2),
'not': (INV1, INV1, INV1),
'inv': (INV1, INV1, INV1),
'ibuf': (INV1, INV1, INV1),
'__const1__': (INV1, INV1, INV1),
'tieh': (INV1, INV1, INV1),
'buf': (BUF1, BUF1, BUF1),
'nbuf': (BUF1, BUF1, BUF1),
'delln': (BUF1, BUF1, BUF1),
'__const0__': (BUF1, BUF1, BUF1),
'tiel': (BUF1, BUF1, BUF1),
'ao22': (AO22, AO22, AO22),
'aoi22': (AOI22, AOI22, AOI22),
'ao21': (AO21, AO21, AO21),
'aoi21': (AOI21, AOI21, AOI21),
'oa22': (OA22, OA22, OA22),
'oai22': (OAI22, OAI22, OAI22),
'oa21': (OA21, OA21, OA21),
'oai21': (OAI21, OAI21, OAI21),
'mux21': (MUX21, MUX21, MUX21),
}
@classmethod
def names(cls):
return dict([(v, k) for k, v in cls.__dict__.items() if isinstance(v, int)])
class Heap: class Heap:
def __init__(self): def __init__(self):
@ -184,7 +137,7 @@ class Heap:
class SimOps: class SimOps:
"""A static scheduler that translates a Circuit into a topologically sorted list of basic logic operations (self.ops) and """A static scheduler that translates a Circuit into a topologically sorted list of basic logic operations (self.ops) and
a value allocation table (self.vat) for use in simulators. a memory mapping (self.c_locs, self.c_caps) for use in simulators.
:param circuit: The circuit to create a schedule for. :param circuit: The circuit to create a schedule for.
:param strip_forks: If enabled, the scheduler will not include fork nodes to safe simulation time. :param strip_forks: If enabled, the scheduler will not include fork nodes to safe simulation time.
@ -203,12 +156,12 @@ class SimOps:
if isinstance(c_caps, int): if isinstance(c_caps, int):
c_caps = [c_caps] * len(circuit.lines) c_caps = [c_caps] * len(circuit.lines)
# indices for state allocation table (sat) # special locations and offsets in c_locs/c_caps
self.zero_idx = len(circuit.lines) self.zero_idx = len(circuit.lines)
self.tmp_idx = self.zero_idx + 1 self.tmp_idx = self.zero_idx + 1
self.ppi_offset = self.tmp_idx + 1 self.ppi_offset = self.tmp_idx + 1
self.ppo_offset = self.ppi_offset + len(self.s_nodes) self.ppo_offset = self.ppi_offset + len(self.s_nodes)
self.vat_len = self.ppo_offset + len(self.s_nodes) self.c_locs_len = self.ppo_offset + len(self.s_nodes)
# translate circuit structure into self.ops # translate circuit structure into self.ops
ops = [] ops = []
@ -217,14 +170,14 @@ class SimOps:
if n in interface_dict: if n in interface_dict:
inp_idx = self.ppi_offset + interface_dict[n] inp_idx = self.ppi_offset + interface_dict[n]
if len(n.outs) > 0 and n.outs[0] is not None: # first output of a PI/PPI if len(n.outs) > 0 and n.outs[0] is not None: # first output of a PI/PPI
ops.append((SimPrim.BUF1, n.outs[0].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx)) ops.append((BUF1, n.outs[0].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx))
if 'dff' in n.kind.lower(): # second output of DFF is inverted if 'dff' in n.kind.lower(): # second output of DFF is inverted
if len(n.outs) > 1 and n.outs[1] is not None: if len(n.outs) > 1 and n.outs[1] is not None:
ops.append((SimPrim.INV1, n.outs[1].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx)) ops.append((INV1, n.outs[1].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx))
else: # if not DFF, no output is inverted. else: # if not DFF, no output is inverted.
for o_line in n.outs[1:]: for o_line in n.outs[1:]:
if o_line is not None: if o_line is not None:
ops.append((SimPrim.BUF1, o_line.index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx)) ops.append((BUF1, o_line.index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx))
continue continue
# regular node, not PI/PPI or PO/PPO # regular node, not PI/PPI or PO/PPO
o0_idx = n.outs[0].index if len(n.outs) > 0 and n.outs[0] is not None else self.tmp_idx o0_idx = n.outs[0].index if len(n.outs) > 0 and n.outs[0] is not None else self.tmp_idx
@ -237,10 +190,10 @@ class SimOps:
if not strip_forks: if not strip_forks:
for o_line in n.outs: for o_line in n.outs:
if o_line is not None: if o_line is not None:
ops.append((SimPrim.BUF1, o_line.index, i0_idx, i1_idx, i2_idx, i3_idx)) ops.append((BUF1, o_line.index, i0_idx, i1_idx, i2_idx, i3_idx))
continue continue
sp = None sp = None
for prefix, prims in SimPrim.kind_prefixes.items(): for prefix, prims in kind_prefixes.items():
if kind.startswith(prefix): if kind.startswith(prefix):
sp = prims[0] sp = prims[0]
if i3_idx == self.zero_idx: if i3_idx == self.zero_idx:
@ -256,7 +209,7 @@ class SimOps:
self.ops = np.asarray(ops, dtype='int32') self.ops = np.asarray(ops, dtype='int32')
# create a map from fanout lines to stem lines for fork stripping # create a map from fanout lines to stem lines for fork stripping
stems = np.zeros(self.vat_len, dtype='int32') - 1 # default to -1: 'no fanout line' stems = np.zeros(self.c_locs_len, dtype='int32') - 1 # default to -1: 'no fanout line'
if strip_forks: if strip_forks:
for f in circuit.forks.values(): for f in circuit.forks.values():
prev_line = f.ins[0] prev_line = f.ins[0]
@ -267,8 +220,8 @@ class SimOps:
stems[ol] = stem_idx stems[ol] = stem_idx
# calculate level (distance from PI/PPI) and reference count for each line # calculate level (distance from PI/PPI) and reference count for each line
levels = np.zeros(self.vat_len, dtype='int32') levels = np.zeros(self.c_locs_len, dtype='int32')
ref_count = np.zeros(self.vat_len, dtype='int32') ref_count = np.zeros(self.c_locs_len, dtype='int32')
level_starts = [0] level_starts = [0]
current_level = 1 current_level = 1
for i, op in enumerate(self.ops): for i, op in enumerate(self.ops):
@ -289,21 +242,21 @@ class SimOps:
self.level_stops = np.asarray(level_starts[1:] + [len(self.ops)], dtype='int32') self.level_stops = np.asarray(level_starts[1:] + [len(self.ops)], dtype='int32')
# state allocation table. maps line and interface indices to self.state memory locations # state allocation table. maps line and interface indices to self.state memory locations
self.vat = np.zeros((self.vat_len, 3), dtype='int') self.c_locs = np.full((self.c_locs_len,), -1, dtype=np.int32)
self.vat[:, 0] = -1 self.c_caps = np.zeros((self.c_locs_len,), dtype=np.int32)
h = Heap() h = Heap()
# allocate and keep memory for special fields # allocate and keep memory for special fields
self.vat[self.zero_idx] = h.alloc(1), 1, 0 self.c_locs[self.zero_idx], self.c_caps[self.zero_idx] = h.alloc(1), 1
self.vat[self.tmp_idx] = h.alloc(1), 1, 0 self.c_locs[self.tmp_idx], self.c_caps[self.tmp_idx] = h.alloc(1), 1
ref_count[self.zero_idx] += 1 ref_count[self.zero_idx] += 1
ref_count[self.tmp_idx] += 1 ref_count[self.tmp_idx] += 1
# allocate and keep memory for PI/PPI, keep memory for PO/PPO (allocated later) # allocate and keep memory for PI/PPI, keep memory for PO/PPO (allocated later)
for i, n in enumerate(self.s_nodes): for i, n in enumerate(self.s_nodes):
if len(n.outs) > 0: if len(n.outs) > 0:
self.vat[self.ppi_offset + i] = h.alloc(1), 1, 0 self.c_locs[self.ppi_offset + i], self.c_caps[self.ppi_offset + i] = h.alloc(1), 1
ref_count[self.ppi_offset + i] += 1 ref_count[self.ppi_offset + i] += 1
if len(n.ins) > 0: if len(n.ins) > 0:
i0_idx = stems[n.ins[0]] if stems[n.ins[0]] >= 0 else n.ins[0] i0_idx = stems[n.ins[0]] if stems[n.ins[0]] >= 0 else n.ins[0]
@ -322,13 +275,13 @@ class SimOps:
ref_count[i1_idx] -= 1 ref_count[i1_idx] -= 1
ref_count[i2_idx] -= 1 ref_count[i2_idx] -= 1
ref_count[i3_idx] -= 1 ref_count[i3_idx] -= 1
if ref_count[i0_idx] <= 0: free_list.append(self.vat[i0_idx, 0]) if ref_count[i0_idx] <= 0: free_list.append(self.c_locs[i0_idx])
if ref_count[i1_idx] <= 0: free_list.append(self.vat[i1_idx, 0]) if ref_count[i1_idx] <= 0: free_list.append(self.c_locs[i1_idx])
if ref_count[i2_idx] <= 0: free_list.append(self.vat[i2_idx, 0]) if ref_count[i2_idx] <= 0: free_list.append(self.c_locs[i2_idx])
if ref_count[i3_idx] <= 0: free_list.append(self.vat[i3_idx, 0]) if ref_count[i3_idx] <= 0: free_list.append(self.c_locs[i3_idx])
o_idx = op[1] o_idx = op[1]
cap = c_caps[o_idx] cap = c_caps[o_idx]
self.vat[o_idx] = h.alloc(cap), cap, 0 self.c_locs[o_idx], self.c_caps[o_idx] = h.alloc(cap), cap
if not keep_signals: if not keep_signals:
for loc in free_list: for loc in free_list:
h.free(loc) h.free(loc)
@ -336,16 +289,15 @@ class SimOps:
# copy memory location and capacity from stems to fanout lines # copy memory location and capacity from stems to fanout lines
for lidx, stem in enumerate(stems): for lidx, stem in enumerate(stems):
if stem >= 0: # if at a fanout line if stem >= 0: # if at a fanout line
self.vat[lidx] = self.vat[stem] self.c_locs[lidx], self.c_caps[lidx] = self.c_locs[stem], self.c_caps[stem]
# copy memory location to PO/PPO area # copy memory location to PO/PPO area
for i, n in enumerate(self.s_nodes): for i, n in enumerate(self.s_nodes):
if len(n.ins) > 0: if len(n.ins) > 0:
self.vat[self.ppo_offset + i] = self.vat[n.ins[0]] self.c_locs[self.ppo_offset + i], self.c_caps[self.ppo_offset + i] = self.c_locs[n.ins[0]], self.c_caps[n.ins[0]]
self.c_len = h.max_size self.c_len = h.max_size
from collections import defaultdict from collections import defaultdict
self.prim_counts = defaultdict(int) self.prim_counts = defaultdict(int)
names_dict = SimPrim.names() for op, _, _, _, _, _ in self.ops: self.prim_counts[names[op]] += 1
for op, _, _, _, _, _ in self.ops: self.prim_counts[names_dict[op]] += 1

148
src/kyupy/wave_sim.py

@ -16,8 +16,7 @@ import math
import numpy as np import numpy as np
from . import numba, cuda, hr_bytes from . import numba, cuda, hr_bytes, sim
from .sim import SimOps
TMAX = np.float32(2 ** 127) TMAX = np.float32(2 ** 127)
@ -29,7 +28,7 @@ TMIN = np.float32(-2 ** 127)
"""A large negative 32-bit floating point value used at the beginning of waveforms that start with logic-1.""" """A large negative 32-bit floating point value used at the beginning of waveforms that start with logic-1."""
class WaveSim(SimOps): class WaveSim(sim.SimOps):
"""A waveform-based combinational logic timing simulator running on CPU. """A waveform-based combinational logic timing simulator running on CPU.
:param circuit: The circuit to simulate. :param circuit: The circuit to simulate.
@ -54,30 +53,31 @@ class WaveSim(SimOps):
self.sims = sims self.sims = sims
self.c_len *= 4 self.c_len *= 4
self.vat[...,0:2] *= 4 self.c_locs[...] *= 4
self.c_caps[...] *= 4
self.timing = np.zeros((self.vat_len, 2, 2)) self.timing = np.zeros((self.c_locs_len, 2, 2))
self.timing[:len(timing)] = timing self.timing[:len(timing)] = timing
self.c = np.zeros((self.c_len, sims), dtype=np.float32) + TMAX self.c = np.zeros((self.c_len, sims), dtype=np.float32) + TMAX
self.s = np.zeros((len(self.s_nodes), sims, 11), dtype=np.float32) self.s = np.zeros((11, self.s_len, sims), dtype=np.float32)
"""Information about the logic values and transitions around the sequential elements (flip-flops) and ports. """Information about the logic values and transitions around the sequential elements (flip-flops) and ports.
The first 3 values are read by ``s_to_c()``. The first 3 values are read by ``s_to_c()``.
The remaining values are written by ``c_to_s()``. The remaining values are written by ``c_to_s()``.
The elements are as follows: The elements are as follows:
* ``s[..., 0]`` (P)PI initial value * ``s[0]`` (P)PI initial value
* ``s[..., 1]`` (P)PI transition time * ``s[1]`` (P)PI transition time
* ``s[..., 2]`` (P)PI final value * ``s[2]`` (P)PI final value
* ``s[..., 3]`` (P)PO initial value * ``s[3]`` (P)PO initial value
* ``s[..., 4]`` (P)PO earliest arrival time (EAT): The time at which the output transitioned from its initial value. * ``s[4]`` (P)PO earliest arrival time (EAT): The time at which the output transitioned from its initial value.
* ``s[..., 5]`` (P)PO latest stabilization time (LST): The time at which the output settled to its final value. * ``s[5]`` (P)PO latest stabilization time (LST): The time at which the output settled to its final value.
* ``s[..., 6]`` (P)PO final value * ``s[6]`` (P)PO final value
* ``s[..., 7]`` (P)PO capture value: probability of capturing a 1 at a given capture time * ``s[7]`` (P)PO capture value: probability of capturing a 1 at a given capture time
* ``s[..., 8]`` (P)PO sampled capture value: decided by random sampling according to a given seed. * ``s[8]`` (P)PO sampled capture value: decided by random sampling according to a given seed.
* ``s[..., 9]`` (P)PO sampled capture slack: (capture time - LST) - decided by random sampling according to a given seed. * ``s[9]`` (P)PO sampled capture slack: (capture time - LST) - decided by random sampling according to a given seed.
* ``s[..., 10]`` Overflow indicator: If non-zero, some signals in the input cone of this output had more * ``s[10]`` Overflow indicator: If non-zero, some signals in the input cone of this output had more
transitions than specified in ``c_caps``. Some transitions have been discarded, the transitions than specified in ``c_caps``. Some transitions have been discarded, the
final values in the waveforms are still valid. final values in the waveforms are still valid.
""" """
@ -85,19 +85,19 @@ class WaveSim(SimOps):
self.params = np.zeros((sims, 4), dtype=np.float32) self.params = np.zeros((sims, 4), dtype=np.float32)
self.params[...,0] = 1.0 self.params[...,0] = 1.0
self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.vat, self.ops, self.params)]) self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.c_locs, self.c_caps, self.ops, self.params)])
self.pi_s_locs = np.flatnonzero(self.vat[self.ppi_offset+np.arange(len(self.circuit.io_nodes)), 0] >= 0) self.pi_s_locs = np.flatnonzero(self.c_locs[self.ppi_offset+np.arange(len(self.circuit.io_nodes))] >= 0)
self.po_s_locs = np.flatnonzero(self.vat[self.ppo_offset+np.arange(len(self.circuit.io_nodes)), 0] >= 0) self.po_s_locs = np.flatnonzero(self.c_locs[self.ppo_offset+np.arange(len(self.circuit.io_nodes))] >= 0)
self.ppio_s_locs = np.arange(len(self.circuit.io_nodes), len(self.s_nodes)) self.ppio_s_locs = np.arange(len(self.circuit.io_nodes), len(self.s_nodes))
self.pippi_s_locs = np.concatenate([self.pi_s_locs, self.ppio_s_locs]) self.pippi_s_locs = np.concatenate([self.pi_s_locs, self.ppio_s_locs])
self.poppo_s_locs = np.concatenate([self.po_s_locs, self.ppio_s_locs]) self.poppo_s_locs = np.concatenate([self.po_s_locs, self.ppio_s_locs])
self.pi_c_locs = self.vat[self.ppi_offset+self.pi_s_locs, 0] self.pi_c_locs = self.c_locs[self.ppi_offset+self.pi_s_locs]
self.po_c_locs = self.vat[self.ppo_offset+self.po_s_locs, 0] self.po_c_locs = self.c_locs[self.ppo_offset+self.po_s_locs]
self.ppi_c_locs = self.vat[self.ppi_offset+self.ppio_s_locs, 0] self.ppi_c_locs = self.c_locs[self.ppi_offset+self.ppio_s_locs]
self.ppo_c_locs = self.vat[self.ppo_offset+self.ppio_s_locs, 0] self.ppo_c_locs = self.c_locs[self.ppo_offset+self.ppio_s_locs]
self.pippi_c_locs = np.concatenate([self.pi_c_locs, self.ppi_c_locs]) self.pippi_c_locs = np.concatenate([self.pi_c_locs, self.ppi_c_locs])
self.poppo_c_locs = np.concatenate([self.po_c_locs, self.ppo_c_locs]) self.poppo_c_locs = np.concatenate([self.po_c_locs, self.ppo_c_locs])
@ -112,7 +112,7 @@ class WaveSim(SimOps):
Based on the data in ``self.s``, waveforms are generated on the input lines of the circuit. Based on the data in ``self.s``, waveforms are generated on the input lines of the circuit.
It modifies ``self.c``. It modifies ``self.c``.
""" """
sins = np.moveaxis(self.s[self.pippi_s_locs], -1, 0) sins = self.s[:, self.pippi_s_locs]
cond = (sins[2] != 0) + 2*(sins[0] != 0) # choices order: 0 R F 1 cond = (sins[2] != 0) + 2*(sins[0] != 0) # choices order: 0 R F 1
self.c[self.pippi_c_locs] = np.choose(cond, [TMAX, sins[1], TMIN, TMIN]) self.c[self.pippi_c_locs] = np.choose(cond, [TMAX, sins[1], TMIN, TMIN])
self.c[self.pippi_c_locs+1] = np.choose(cond, [TMAX, TMAX, sins[1], TMAX]) self.c[self.pippi_c_locs+1] = np.choose(cond, [TMAX, TMAX, sins[1], TMAX])
@ -127,7 +127,7 @@ class WaveSim(SimOps):
""" """
sims = min(sims or self.sims, self.sims) sims = min(sims or self.sims, self.sims)
for op_start, op_stop in zip(self.level_starts, self.level_stops): for op_start, op_stop in zip(self.level_starts, self.level_stops):
level_eval_cpu(self.ops, op_start, op_stop, self.c, self.vat, 0, sims, level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, 0, sims,
self.timing, self.params, sd, seed) self.timing, self.params, sd, seed)
def c_to_s(self, time=TMAX, sd=0.0, seed=1): def c_to_s(self, time=TMAX, sd=0.0, seed=1):
@ -140,9 +140,9 @@ class WaveSim(SimOps):
:param sd: A standard deviation for uncertainty in the actual capture time. :param sd: A standard deviation for uncertainty in the actual capture time.
:param seed: The random seed for a capture with uncertainty. :param seed: The random seed for a capture with uncertainty.
""" """
for s_loc, (c_loc, c_len, _) in zip(self.poppo_s_locs, self.vat[self.ppo_offset+self.poppo_s_locs]): for s_loc, c_loc, c_len in zip(self.poppo_s_locs, self.c_locs[self.ppo_offset+self.poppo_s_locs], self.c_caps[self.ppo_offset+self.poppo_s_locs]):
for vector in range(self.sims): for vector in range(self.sims):
self.s[s_loc, vector, 3:] = wave_capture_cpu(self.c, c_loc, c_len, vector, time=time, sd=sd, seed=seed) self.s[3:, s_loc, vector] = wave_capture_cpu(self.c, c_loc, c_len, vector, time=time, sd=sd, seed=seed)
def s_ppo_to_ppi(self, time=0.0): def s_ppo_to_ppi(self, time=0.0):
"""Re-assigns the last sampled capture to the appropriate pseudo-primary inputs (PPI). """Re-assigns the last sampled capture to the appropriate pseudo-primary inputs (PPI).
@ -151,9 +151,9 @@ class WaveSim(SimOps):
:param time: The transition time at the inputs (usually 0.0). :param time: The transition time at the inputs (usually 0.0).
""" """
self.s[self.ppio_s_locs, :, 0] = self.s[self.ppio_s_locs, :, 2] self.s[0, self.ppio_s_locs] = self.s[2, self.ppio_s_locs]
self.s[self.ppio_s_locs, :, 1] = time self.s[1, self.ppio_s_locs] = time
self.s[self.ppio_s_locs, :, 2] = self.s[self.ppio_s_locs, :, 8] self.s[2, self.ppio_s_locs] = self.s[8, self.ppio_s_locs]
@numba.njit @numba.njit
@ -173,7 +173,7 @@ def rand_gauss_cpu(seed, sd):
@numba.njit @numba.njit
def wave_eval_cpu(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0): def wave_eval_cpu(op, cbuf, c_locs, c_caps, st_idx, line_times, param, sd=0.0, seed=0):
lut, z_idx, a_idx, b_idx, c_idx, d_idx = op lut, z_idx, a_idx, b_idx, c_idx, d_idx = op
# >>> same code as wave_eval_cpu (except rand_gauss_*pu()-calls) >>> # >>> same code as wave_eval_cpu (except rand_gauss_*pu()-calls) >>>
@ -181,11 +181,12 @@ def wave_eval_cpu(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0):
_seed = (seed << 4) + (z_idx << 20) + (st_idx << 1) _seed = (seed << 4) + (z_idx << 20) + (st_idx << 1)
a_mem = vat[a_idx, 0] a_mem = c_locs[a_idx]
b_mem = vat[b_idx, 0] b_mem = c_locs[b_idx]
c_mem = vat[c_idx, 0] c_mem = c_locs[c_idx]
d_mem = vat[d_idx, 0] d_mem = c_locs[d_idx]
z_mem, z_cap, _ = vat[z_idx] z_mem = c_locs[z_idx]
z_cap = c_caps[z_idx]
a_cur = int(0) a_cur = int(0)
b_cur = int(0) b_cur = int(0)
@ -280,12 +281,12 @@ def wave_eval_cpu(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0):
@numba.njit @numba.njit
def level_eval_cpu(ops, op_start, op_stop, c, vat, st_start, st_stop, line_times, params, sd, seed): def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, st_start, st_stop, line_times, params, sd, seed):
overflows = 0 overflows = 0
for op_idx in range(op_start, op_stop): for op_idx in range(op_start, op_stop):
op = ops[op_idx] op = ops[op_idx]
for st_idx in range(st_start, st_stop): for st_idx in range(st_start, st_stop):
wave_eval_cpu(op, c, vat, st_idx, line_times, params[st_idx], sd, seed) wave_eval_cpu(op, c, c_locs, c_caps, st_idx, line_times, params[st_idx], sd, seed)
@numba.njit @numba.njit
@ -347,7 +348,8 @@ class WaveSimCuda(WaveSim):
self.c = cuda.to_device(self.c) self.c = cuda.to_device(self.c)
self.s = cuda.to_device(self.s) self.s = cuda.to_device(self.s)
self.ops = cuda.to_device(self.ops) self.ops = cuda.to_device(self.ops)
self.vat = cuda.to_device(self.vat) self.c_locs = cuda.to_device(self.c_locs)
self.c_caps = cuda.to_device(self.c_caps)
self.timing = cuda.to_device(self.timing) self.timing = cuda.to_device(self.timing)
self.params = cuda.to_device(self.params) self.params = cuda.to_device(self.params)
@ -355,7 +357,7 @@ class WaveSimCuda(WaveSim):
def s_to_c(self): def s_to_c(self):
grid_dim = self._grid_dim(self.sims, self.s_len) grid_dim = self._grid_dim(self.sims, self.s_len)
wave_assign_gpu[grid_dim, self._block_dim](self.c, self.s, self.vat, self.ppi_offset) wave_assign_gpu[grid_dim, self._block_dim](self.c, self.s, self.c_locs, self.ppi_offset)
def _grid_dim(self, x, y): def _grid_dim(self, x, y):
gx = math.ceil(x / self._block_dim[0]) gx = math.ceil(x / self._block_dim[0])
@ -366,29 +368,29 @@ class WaveSimCuda(WaveSim):
sims = min(sims or self.sims, self.sims) sims = min(sims or self.sims, self.sims)
for op_start, op_stop in zip(self.level_starts, self.level_stops): for op_start, op_stop in zip(self.level_starts, self.level_stops):
grid_dim = self._grid_dim(sims, op_stop - op_start) grid_dim = self._grid_dim(sims, op_stop - op_start)
wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.vat, int(0), wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, int(0),
sims, self.timing, self.params, sd, seed) sims, self.timing, self.params, sd, seed)
cuda.synchronize() cuda.synchronize()
def c_to_s(self, time=TMAX, sd=0.0, seed=1): def c_to_s(self, time=TMAX, sd=0.0, seed=1):
grid_dim = self._grid_dim(self.sims, self.s_len) grid_dim = self._grid_dim(self.sims, self.s_len)
wave_capture_gpu[grid_dim, self._block_dim](self.c, self.s, self.vat, self.ppo_offset, wave_capture_gpu[grid_dim, self._block_dim](self.c, self.s, self.c_locs, self.c_caps, self.ppo_offset,
time, sd * math.sqrt(2), seed) time, sd * math.sqrt(2), seed)
def s_ppo_to_ppi(self, time=0.0): def s_ppo_to_ppi(self, time=0.0):
grid_dim = self._grid_dim(self.sims, self.s_len) grid_dim = self._grid_dim(self.sims, self.s_len)
ppo_to_ppi_gpu[grid_dim, self._block_dim](self.s, self.vat, time, self.ppi_offset, self.ppo_offset) ppo_to_ppi_gpu[grid_dim, self._block_dim](self.s, self.c_locs, time, self.ppi_offset, self.ppo_offset)
@cuda.jit() @cuda.jit()
def wave_assign_gpu(c, s, vat, ppi_offset): def wave_assign_gpu(c, s, c_locs, ppi_offset):
x, y = cuda.grid(2) x, y = cuda.grid(2)
if y >= len(s): return if y >= s.shape[1]: return
c_loc, c_len, _ = vat[ppi_offset + y] c_loc = c_locs[ppi_offset + y]
if c_loc < 0: return if c_loc < 0: return
if x >= c.shape[-1]: return if x >= c.shape[-1]: return
value = int(s[y, x, 2] >= 0.5) | (2*int(s[y, x, 0] >= 0.5)) value = int(s[2, y, x] >= 0.5) | (2*int(s[0, y, x] >= 0.5))
ttime = s[y, x, 1] ttime = s[1, y, x]
if value == 0: if value == 0:
c[c_loc, x] = TMAX c[c_loc, x] = TMAX
c[c_loc+1, x] = TMAX c[c_loc+1, x] = TMAX
@ -421,7 +423,7 @@ def rand_gauss_gpu(seed, sd):
@cuda.jit() @cuda.jit()
def wave_eval_gpu(ops, op_start, op_stop, cbuf, vat, st_start, st_stop, line_times, param, sd, seed): def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, st_start, st_stop, line_times, param, sd, seed):
x, y = cuda.grid(2) x, y = cuda.grid(2)
st_idx = st_start + x st_idx = st_start + x
op_idx = op_start + y op_idx = op_start + y
@ -442,11 +444,12 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, vat, st_start, st_stop, line_tim
_seed = (seed << 4) + (z_idx << 20) + (st_idx << 1) _seed = (seed << 4) + (z_idx << 20) + (st_idx << 1)
a_mem = vat[a_idx, 0] a_mem = c_locs[a_idx]
b_mem = vat[b_idx, 0] b_mem = c_locs[b_idx]
c_mem = vat[c_idx, 0] c_mem = c_locs[c_idx]
d_mem = vat[d_idx, 0] d_mem = c_locs[d_idx]
z_mem, z_cap, _ = vat[z_idx] z_mem = c_locs[z_idx]
z_cap = c_caps[z_idx]
a_cur = int(0) a_cur = int(0)
b_cur = int(0) b_cur = int(0)
@ -541,10 +544,11 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, vat, st_start, st_stop, line_tim
@cuda.jit() @cuda.jit()
def wave_capture_gpu(c, s, vat, ppo_offset, time, s_sqrt2, seed): def wave_capture_gpu(c, s, c_locs, c_caps, ppo_offset, time, s_sqrt2, seed):
x, y = cuda.grid(2) x, y = cuda.grid(2)
if ppo_offset + y >= len(vat): return if ppo_offset + y >= len(c_locs): return
line, tdim, _ = vat[ppo_offset + y] line = c_locs[ppo_offset + y]
tdim = c_caps[ppo_offset + y]
if line < 0: return if line < 0: return
if x >= c.shape[-1]: return if x >= c.shape[-1]: return
vector = x vector = x
@ -588,25 +592,25 @@ def wave_capture_gpu(c, s, vat, ppo_offset, time, s_sqrt2, seed):
else: else:
acc = val acc = val
s[y, vector, 3] = (c[line, vector] <= TMIN) s[3, y, vector] = (c[line, vector] <= TMIN)
s[y, vector, 4] = eat s[4, y, vector] = eat
s[y, vector, 5] = lst s[5, y, vector] = lst
s[y, vector, 6] = final s[6, y, vector] = final
s[y, vector, 7] = acc s[7, y, vector] = acc
s[y, vector, 8] = val s[8, y, vector] = val
s[y, vector, 9] = 0 # TODO s[9, y, vector] = 0 # TODO
s[y, vector, 10] = ovl s[10, y, vector] = ovl
@cuda.jit() @cuda.jit()
def ppo_to_ppi_gpu(s, vat, time, ppi_offset, ppo_offset): def ppo_to_ppi_gpu(s, c_locs, time, ppi_offset, ppo_offset):
x, y = cuda.grid(2) x, y = cuda.grid(2)
if y >= s.shape[0]: return if y >= s.shape[0]: return
if x >= s.shape[1]: return if x >= s.shape[1]: return
if vat[ppi_offset + y, 0] < 0: return if c_locs[ppi_offset + y] < 0: return
if vat[ppo_offset + y, 0] < 0: return if c_locs[ppo_offset + y] < 0: return
s[y, x, 0] = s[y, x, 2] s[0, y, x] = s[2, y, x]
s[y, x, 1] = time s[1, y, x] = time
s[y, x, 2] = s[y, x, 8] s[2, y, x] = s[8, y, x]

88
tests/test_wave_sim.py

@ -2,17 +2,17 @@ import numpy as np
from kyupy.wave_sim import WaveSim, WaveSimCuda, wave_eval_cpu, TMIN, TMAX from kyupy.wave_sim import WaveSim, WaveSimCuda, wave_eval_cpu, TMIN, TMAX
from kyupy.logic_sim import LogicSim from kyupy.logic_sim import LogicSim
from kyupy import verilog, sdf, logic, bench from kyupy import logic, bench, sim
from kyupy.logic import mvarray from kyupy.logic import mvarray
from kyupy.sim import SimPrim
def test_nand_delays(): def test_nand_delays():
op = (SimPrim.NAND4, 4, 0, 1, 2, 3) op = (sim.NAND4, 4, 0, 1, 2, 3)
#op = (0b0111, 4, 0, 1) #op = (0b0111, 4, 0, 1)
c = np.full((5*16, 1), TMAX) # 5 waveforms of capacity 16 c = np.full((5*16, 1), TMAX) # 5 waveforms of capacity 16
vat = np.zeros((5, 3), dtype='int') c_locs = np.zeros((5,), dtype='int')
for i in range(5): vat[i] = i*16, 16, 0 # 1:1 mapping c_caps = np.zeros((5,), dtype='int')
for i in range(5): c_locs[i], c_caps[i] = i*16, 16 # 1:1 mapping
# SDF specifies IOPATH delays with respect to output polarity # SDF specifies IOPATH delays with respect to output polarity
# SDF pulse rejection value is determined by IOPATH causing last transition and polarity of last transition # SDF pulse rejection value is determined by IOPATH causing last transition and polarity of last transition
@ -32,7 +32,7 @@ def test_nand_delays():
def wave_assert(inputs, output): def wave_assert(inputs, output):
for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i
wave_eval_cpu(op, c, vat, 0, line_times, sdata) wave_eval_cpu(op, c, c_locs, c_caps, 0, line_times, sdata)
for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[4,i], v) for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[4,i], v)
wave_assert([[TMAX,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(0,0,1,1) => 1 wave_assert([[TMAX,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(0,0,1,1) => 1
@ -53,29 +53,29 @@ def test_tiny_circuit():
lt = np.zeros((len(c.lines), 2, 2)) lt = np.zeros((len(c.lines), 2, 2))
lt[:,0,:] = 1.0 # unit delay for all lines lt[:,0,:] = 1.0 # unit delay for all lines
wsim = WaveSim(c, lt) wsim = WaveSim(c, lt)
assert len(wsim.s) == 5 assert wsim.s.shape[1] == 5
# values for x # values for x
wsim.s[0,0,:3] = 0, 0.1, 0 wsim.s[:3,0,0] = 0, 0.1, 0
wsim.s[0,1,:3] = 0, 0.2, 1 wsim.s[:3,0,1] = 0, 0.2, 1
wsim.s[0,2,:3] = 1, 0.3, 0 wsim.s[:3,0,2] = 1, 0.3, 0
wsim.s[0,3,:3] = 1, 0.4, 1 wsim.s[:3,0,3] = 1, 0.4, 1
# values for y # values for y
wsim.s[1,0,:3] = 1, 0.5, 0 wsim.s[:3,1,0] = 1, 0.5, 0
wsim.s[1,1,:3] = 1, 0.6, 0 wsim.s[:3,1,1] = 1, 0.6, 0
wsim.s[1,2,:3] = 1, 0.7, 0 wsim.s[:3,1,2] = 1, 0.7, 0
wsim.s[1,3,:3] = 0, 0.8, 1 wsim.s[:3,1,3] = 0, 0.8, 1
wsim.s_to_c() wsim.s_to_c()
x_c_loc = wsim.vat[wsim.ppi_offset+0, 0] # check x waveforms x_c_loc = wsim.c_locs[wsim.ppi_offset+0] # check x waveforms
np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 0], [TMAX, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 0], [TMAX, TMAX, TMAX])
np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 1], [0.2, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 1], [0.2, TMAX, TMAX])
np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 2], [TMIN, 0.3, TMAX]) np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 2], [TMIN, 0.3, TMAX])
np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 3], [TMIN, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 3], [TMIN, TMAX, TMAX])
y_c_loc = wsim.vat[wsim.ppi_offset+1, 0] # check y waveforms y_c_loc = wsim.c_locs[wsim.ppi_offset+1] # check y waveforms
np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 0], [TMIN, 0.5, TMAX]) np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 0], [TMIN, 0.5, TMAX])
np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 1], [TMIN, 0.6, TMAX]) np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 1], [TMIN, 0.6, TMAX])
np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 2], [TMIN, 0.7, TMAX]) np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 2], [TMIN, 0.7, TMAX])
@ -83,19 +83,19 @@ def test_tiny_circuit():
wsim.c_prop() wsim.c_prop()
a_c_loc = wsim.vat[wsim.ppo_offset+2, 0] # check a waveforms a_c_loc = wsim.c_locs[wsim.ppo_offset+2] # check a waveforms
np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 0], [TMAX, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 0], [TMAX, TMAX, TMAX])
np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 1], [1.2, 1.6, TMAX]) np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 1], [1.2, 1.6, TMAX])
np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 2], [TMIN, 1.3, TMAX]) np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 2], [TMIN, 1.3, TMAX])
np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 3], [1.8, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 3], [1.8, TMAX, TMAX])
o_c_loc = wsim.vat[wsim.ppo_offset+3, 0] # check o waveforms o_c_loc = wsim.c_locs[wsim.ppo_offset+3] # check o waveforms
np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 0], [TMIN, 1.5, TMAX]) np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 0], [TMIN, 1.5, TMAX])
np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 1], [TMIN, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 1], [TMIN, TMAX, TMAX])
np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 2], [TMIN, 1.7, TMAX]) np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 2], [TMIN, 1.7, TMAX])
np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 3], [TMIN, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 3], [TMIN, TMAX, TMAX])
n_c_loc = wsim.vat[wsim.ppo_offset+4, 0] # check n waveforms n_c_loc = wsim.c_locs[wsim.ppo_offset+4] # check n waveforms
np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 0], [TMIN, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 0], [TMIN, TMAX, TMAX])
np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 1], [TMIN, 1.2, TMAX]) np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 1], [TMIN, 1.2, TMAX])
np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 2], [1.3, TMAX, TMAX]) np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 2], [1.3, TMAX, TMAX])
@ -104,22 +104,22 @@ def test_tiny_circuit():
wsim.c_to_s() wsim.c_to_s()
# check a captures # check a captures
np.testing.assert_allclose(wsim.s[2, 0, 3:7], [0, TMAX, TMIN, 0]) np.testing.assert_allclose(wsim.s[3:7, 2, 0], [0, TMAX, TMIN, 0])
np.testing.assert_allclose(wsim.s[2, 1, 3:7], [0, 1.2, 1.6, 0]) np.testing.assert_allclose(wsim.s[3:7, 2, 1], [0, 1.2, 1.6, 0])
np.testing.assert_allclose(wsim.s[2, 2, 3:7], [1, 1.3, 1.3, 0]) np.testing.assert_allclose(wsim.s[3:7, 2, 2], [1, 1.3, 1.3, 0])
np.testing.assert_allclose(wsim.s[2, 3, 3:7], [0, 1.8, 1.8, 1]) np.testing.assert_allclose(wsim.s[3:7, 2, 3], [0, 1.8, 1.8, 1])
# check o captures # check o captures
np.testing.assert_allclose(wsim.s[3, 0, 3:7], [1, 1.5, 1.5, 0]) np.testing.assert_allclose(wsim.s[3:7, 3, 0], [1, 1.5, 1.5, 0])
np.testing.assert_allclose(wsim.s[3, 1, 3:7], [1, TMAX, TMIN, 1]) np.testing.assert_allclose(wsim.s[3:7, 3, 1], [1, TMAX, TMIN, 1])
np.testing.assert_allclose(wsim.s[3, 2, 3:7], [1, 1.7, 1.7, 0]) np.testing.assert_allclose(wsim.s[3:7, 3, 2], [1, 1.7, 1.7, 0])
np.testing.assert_allclose(wsim.s[3, 3, 3:7], [1, TMAX, TMIN, 1]) np.testing.assert_allclose(wsim.s[3:7, 3, 3], [1, TMAX, TMIN, 1])
# check o captures # check o captures
np.testing.assert_allclose(wsim.s[4, 0, 3:7], [1, TMAX, TMIN, 1]) np.testing.assert_allclose(wsim.s[3:7, 4, 0], [1, TMAX, TMIN, 1])
np.testing.assert_allclose(wsim.s[4, 1, 3:7], [1, 1.2, 1.2, 0]) np.testing.assert_allclose(wsim.s[3:7, 4, 1], [1, 1.2, 1.2, 0])
np.testing.assert_allclose(wsim.s[4, 2, 3:7], [0, 1.3, 1.3, 1]) np.testing.assert_allclose(wsim.s[3:7, 4, 2], [0, 1.3, 1.3, 1])
np.testing.assert_allclose(wsim.s[4, 3, 3:7], [0, TMAX, TMIN, 0]) np.testing.assert_allclose(wsim.s[3:7, 4, 3], [0, TMAX, TMIN, 0])
def compare_to_logic_sim(wsim: WaveSim): def compare_to_logic_sim(wsim: WaveSim):
@ -127,17 +127,17 @@ def compare_to_logic_sim(wsim: WaveSim):
rng = np.random.default_rng(10) rng = np.random.default_rng(10)
tests = rng.choice(choices, (wsim.s_len, wsim.sims)) tests = rng.choice(choices, (wsim.s_len, wsim.sims))
wsim.s[:, :, 0] = (tests & 2) >> 1 wsim.s[0] = (tests & 2) >> 1
wsim.s[:, :, 3] = (tests & 2) >> 1 wsim.s[3] = (tests & 2) >> 1
wsim.s[:, :, 1] = 0.0 wsim.s[1] = 0.0
wsim.s[:, :, 2] = tests & 1 wsim.s[2] = tests & 1
wsim.s[:, :, 6] = tests & 1 wsim.s[6] = tests & 1
wsim.s_to_c() wsim.s_to_c()
wsim.c_prop() wsim.c_prop()
wsim.c_to_s() wsim.c_to_s()
resp = np.array(wsim.s[:, :, 6], dtype=np.uint8) | (np.array(wsim.s[:, :, 3], dtype=np.uint8)<<1) resp = np.array(wsim.s[6], dtype=np.uint8) | (np.array(wsim.s[3], dtype=np.uint8)<<1)
resp |= ((resp ^ (resp >> 1)) & 1) << 2 # transitions resp |= ((resp ^ (resp >> 1)) & 1) << 2 # transitions
resp[wsim.pi_s_locs] = logic.UNASSIGNED resp[wsim.pi_s_locs] = logic.UNASSIGNED

Loading…
Cancel
Save