new wave sim

3 years ago · f1ebe1487c
4 changed files with 552 additions and 1125 deletions
--- a/src/kyupy/schedule.py
+++ b/src/kyupy/schedule.py
@ -145,7 +145,7 @@ class Heap:
        return "\n".join(r)
-class Schedule:
+class SimOps:
    """A static scheduler that translates a Circuit into a topologically sorted list of basic logic operations (self.ops) and
    a value allocation table (self.vat) for use in simulators.
@ -155,23 +155,24 @@ class Schedule:
    :param keep_signals: If disabled, memory of intermediate signal waveforms will be re-used. This greatly reduces
        memory footprint, but intermediate signal waveforms become unaccessible after a propagation.
    """
-    def __init__(self, circuit, strip_forks=False, keep_signals=True, signal_caps=1):
+    def __init__(self, circuit, c_caps=1, c_reuse=False, strip_forks=False):
        self.circuit = circuit
-        self.interface = list(circuit.io_nodes) + [n for n in circuit.nodes if 'dff' in n.kind.lower()]
+        self.s_nodes = list(circuit.io_nodes) + [n for n in circuit.nodes if 'dff' in n.kind.lower()]
        keep_signals = not c_reuse
-        if isinstance(signal_caps, int):
+        if isinstance(c_caps, int):
-            signal_caps = [signal_caps] * len(circuit.lines)
+            c_caps = [c_caps] * len(circuit.lines)
        # indices for state allocation table (sat)
        self.zero_idx = len(circuit.lines)
        self.tmp_idx = self.zero_idx + 1
        self.ppi_offset = self.tmp_idx + 1
-        self.ppo_offset = self.ppi_offset + len(self.interface)
+        self.ppo_offset = self.ppi_offset + len(self.s_nodes)
-        self.vat_length = self.ppo_offset + len(self.interface)
+        self.vat_length = self.ppo_offset + len(self.s_nodes)
        # translate circuit structure into self.ops
        ops = []
-        interface_dict = dict((n, i) for i, n in enumerate(self.interface))
+        interface_dict = dict((n, i) for i, n in enumerate(self.s_nodes))
        for n in circuit.topological_order():
            if n in interface_dict:
                inp_idx = self.ppi_offset + interface_dict[n]
@ -260,7 +261,7 @@ class Schedule:
        ref_count[self.tmp_idx] += 1
        # allocate and keep memory for PI/PPI, keep memory for PO/PPO (allocated later)
-        for i, n in enumerate(self.interface):
+        for i, n in enumerate(self.s_nodes):
            if len(n.outs) > 0:
                self.vat[self.ppi_offset + i] = h.alloc(1), 1, 0
                ref_count[self.ppi_offset + i] += 1
@ -286,7 +287,7 @@ class Schedule:
                if ref_count[i2_idx] <= 0: free_list.append(self.vat[i2_idx, 0])
                if ref_count[i3_idx] <= 0: free_list.append(self.vat[i3_idx, 0])
                o_idx = op[1]
-                cap = signal_caps[o_idx]
+                cap = c_caps[o_idx]
                self.vat[o_idx] = h.alloc(cap), cap, 0
            if not keep_signals:
                for loc in free_list:
@ -298,11 +299,11 @@ class Schedule:
                self.vat[lidx] = self.vat[stem]
        # copy memory location to PO/PPO area
-        for i, n in enumerate(self.interface):
+        for i, n in enumerate(self.s_nodes):
            if len(n.ins) > 0:
                self.vat[self.ppo_offset + i] = self.vat[n.ins[0]]
-        self.state_length = h.max_size
+        self.c_len = h.max_size
        from collections import defaultdict
        self.prim_counts = defaultdict(int)
--- a/src/kyupy/wave_sim4.py
+++ b/src/kyupy/wave_sim4.py
@ -0,0 +1,365 @@
 """High-throughput combinational logic timing simulators.
 These simulators work similarly to :py:class:`~kyupy.logic_sim.LogicSim`.
 They propagate values through the combinational circuit from (pseudo) primary inputs to (pseudo) primary outputs.
 Instead of propagating logic values, these simulators propagate signal histories (waveforms).
 They are designed to run many simulations in parallel and while their latencies are quite high, they can achieve
 high throughput.
 The simulators are not event-based and are not capable of simulating sequential circuits directly.
 """
 import math
 from bisect import bisect, insort_left
 import numpy as np
 from . import numba, cuda, hr_bytes
 from .sim import SimOps
 TMAX = np.float32(2 ** 127)
 """A large 32-bit floating point value used to mark the end of a waveform."""
 TMAX_OVL = np.float32(1.1 * 2 ** 127)
 """A large 32-bit floating point value used to mark the end of a waveform that
 may be incomplete due to an overflow."""
 TMIN = np.float32(-2 ** 127)
 """A large negative 32-bit floating point value used at the beginning of waveforms that start with logic-1."""
 class WaveSim(SimOps):
    """A waveform-based combinational logic timing simulator running on CPU.
    :param circuit: The circuit to simulate.
    :param timing: The timing annotation of the circuit (see :py:func:`kyupy.sdf.DelayFile.annotation` for details)
    :param sims: The number of parallel simulations.
    :param c_caps: The number of floats available in each waveform. Values must be positive and a multiple of 4.
        Waveforms encode the signal switching history by storing transition times.
        The waveform capacity roughly corresponds to the number of transitions
        that can be stored. A capacity of ``n`` can store at least ``n-2`` transitions. If more transitions are
        generated during simulation, the latest glitch is removed (freeing up two transition times) and an overflow
        flag is set. If an integer is given, all waveforms are set to that same capacity. With an array of length
        ``len(circuit.lines)`` the capacity is set for each intermediate waveform individually.
    :param strip_forks: If enabled, the simulator will not evaluate fork nodes explicitly. This saves simulation time
        by reducing the number of nodes to simulate, but (interconnect) delay annotations of lines read by fork nodes
        are ignored.
    :param keep_waveforms: If disabled, memory of intermediate signal waveforms will be re-used. This greatly reduces
        memory footprint, but intermediate signal waveforms become unaccessible after a propagation.
    """
    def __init__(self, circuit, timing, sims=8, c_caps=16, c_reuse=False, strip_forks=False):
        assert c_caps > 0 and c_caps % 4 == 0
        super().__init__(circuit, c_caps=c_caps//4, c_reuse=c_reuse, strip_forks=strip_forks)
        self.sims = sims
        self.c_len *= 4
        self.vat[...,0:2] *= 4
        self.timing = np.zeros((self.c_len, 2, 2))
        self.timing[:len(timing)] = timing
        self.c = np.zeros((self.c_len, sims), dtype=np.float32) + TMAX
        self.s = np.zeros((len(self.s_nodes), sims, 11), dtype=np.float32)
        """Information about the logic values and transitions around the sequential elements (flip-flops) and ports.
        The first 3 values are read by ``s_to_c()``.
        The remaining values are written by ``c_to_s()``.
        The elements are as follows:
        * ``s[..., 0]`` (P)PI initial value
        * ``s[..., 1]`` (P)PI transition time
        * ``s[..., 2]`` (P)PI final value
        * ``s[..., 3]`` (P)PO initial value
        * ``s[..., 4]`` (P)PO earliest arrival time (EAT): The time at which the output transitioned from its initial value.
        * ``s[..., 5]`` (P)PO latest stabilization time (LST): The time at which the output settled to its final value.
        * ``s[..., 6]`` (P)PO final value
        * ``s[..., 7]`` (P)PO capture value: probability of capturing a 1 at a given capture time
        * ``s[..., 8]`` (P)PO sampled capture value: decided by random sampling according to a given seed.
        * ``s[..., 9]`` (P)PO sampled capture slack: (capture time - LST) - decided by random sampling according to a given seed.
        * ``s[..., 10]`` Overflow indicator: If non-zero, some signals in the input cone of this output had more
          transitions than specified in ``wavecaps``. Some transitions have been discarded, the
          final values in the waveforms are still valid.
        """
        self.params = np.zeros((sims, 4), dtype=np.float32)
        self.params[...,0] = 1.0
        m1 = np.array([2 ** x for x in range(7, -1, -1)], dtype=np.uint8)
        m0 = ~m1
        self.mask = np.rollaxis(np.vstack((m0, m1)), 1)
        self.overflows = 0
        self.lst_eat_valid = False
        self.pi_s_locs = np.flatnonzero(self.vat[self.ppi_offset+np.arange(len(self.circuit.io_nodes)), 0] >= 0)
        self.po_s_locs = np.flatnonzero(self.vat[self.ppo_offset+np.arange(len(self.circuit.io_nodes)), 0] >= 0)
        self.ppio_s_locs = np.arange(len(self.circuit.io_nodes), len(self.s_nodes))
        self.pippi_s_locs = np.concatenate([self.pi_s_locs, self.ppio_s_locs])
        self.poppo_s_locs = np.concatenate([self.po_s_locs, self.ppio_s_locs])
        self.pi_c_locs = self.vat[self.ppi_offset+self.pi_s_locs, 0]
        self.po_c_locs = self.vat[self.ppo_offset+self.po_s_locs, 0]
        self.ppi_c_locs = self.vat[self.ppi_offset+self.ppio_s_locs, 0]
        self.ppo_c_locs = self.vat[self.ppo_offset+self.ppio_s_locs, 0]
        self.pippi_c_locs = np.concatenate([self.pi_c_locs, self.ppi_c_locs])
        self.poppo_c_locs = np.concatenate([self.po_c_locs, self.ppo_c_locs])
        self.wave_capture = numba.njit(WaveSim.wave_capture)
    def __repr__(self):
        total_mem = self.c.nbytes + self.vat.nbytes + self.ops.nbytes + self.s.nbytes
        return f'<WaveSim {self.circuit.name} sims={self.sims} ops={len(self.ops)} ' + \
               f'levels={len(self.level_starts)} mem={hr_bytes(total_mem)}>'
    def get_line_delay(self, line, polarity):
        """Returns the current delay of the given ``line`` and ``polarity`` in the simulation model."""
        return self.timing[line, 0, polarity]
    def set_line_delay(self, line, polarity, delay):
        """Sets a new ``delay`` for the given ``line`` and ``polarity`` in the simulation model."""
        self.timing[line, 0, polarity] = delay
    def s_to_c(self):
        """Transfers values of sequential elements and primary inputs to the combinational portion.
        Based on the data in ``self.s``, waveforms are generated on the input lines of the circuit.
        It modifies ``self.c``.
        """
        sins = np.moveaxis(self.s[self.pippi_s_locs], -1, 0)
        cond = (sins[2] != 0) + 2*(sins[0] != 0)  # choices order: 0 R F 1
        self.c[self.pippi_c_locs] = np.choose(cond, [TMAX, sins[1], TMIN, TMIN])
        self.c[self.pippi_c_locs+1] = np.choose(cond, [TMAX, TMAX, sins[1], TMAX])
        self.c[self.pippi_c_locs+2] = TMAX
    def c_prop(self, sims=None, sd=0.0, seed=1):
        """Propagates all waveforms from the (pseudo) primary inputs to the (pseudo) primary outputs.
        :param sims: Number of parallel simulations to execute. If None, all available simulations are performed.
        :param sd: Standard deviation for injection of random delay variation. Active, if value is positive.
        :param seed: Random seed for delay variations.
        """
        sims = min(sims or self.sims, self.sims)
        for op_start, op_stop in zip(self.level_starts, self.level_stops):
            self.overflows += level_eval(self.ops, op_start, op_stop, self.c, self.vat, 0, sims,
                                         self.timing, self.params, sd, seed)
        self.lst_eat_valid = False
    def c_to_s(self, time=TMAX, sd=0.0, seed=1):
        """Simulates a capture operation at all sequential elements and primary outputs.
        Propagated waveforms in ``self.c`` at and around the given capture time are analyzed and
        the results are stored in ``self.s``.
        :param time: The desired capture time. By default, a capture of the settled value is performed.
        :param sd: A standard deviation for uncertainty in the actual capture time.
        :param seed: The random seed for a capture with uncertainty.
        """
        for s_loc, (c_loc, c_len, _) in zip(self.poppo_s_locs, self.vat[self.ppo_offset+self.poppo_s_locs]):
            for vector in range(self.sims):
                self.s[s_loc, vector, 3:] = self.wave_capture(self.c, c_loc, c_len, vector, time=time, sd=sd, seed=seed)
    def s_ppo_to_ppi(self, time=0.0):
        """Re-assigns the last sampled capture to the appropriate pseudo-primary inputs (PPI). 
        Each PPI transition is constructed from its previous final value, the
        given time, and the sampled captured value of its PPO. Reads and modifies ``self.s``.
        :param time: The transition time at the inputs (usually 0.0).
        """
        self.s[self.ppio_s_locs, :, 0] = self.s[self.ppio_s_locs, :, 2]
        self.s[self.ppio_s_locs, :, 1] = time
        self.s[self.ppio_s_locs, :, 2] = self.s[self.ppio_s_locs, :, 8]
    @staticmethod
    def wave_capture(c, c_loc, c_len, vector, time=TMAX, sd=0.0, seed=1):
        s_sqrt2 = sd * math.sqrt(2)
        m = 0.5
        acc = 0.0
        eat = TMAX
        lst = TMIN
        tog = 0
        ovl = 0
        val = int(0)
        final = int(0)
        w = c[c_loc:c_loc+c_len, vector]
        for t in w:
            if t >= TMAX:
                if t == TMAX_OVL:
                    ovl = 1
                break
            m = -m
            final ^= 1
            if t < time:
                val ^= 1
            if t <= TMIN: continue
            if s_sqrt2 > 0:
                acc += m * (1 + math.erf((t - time) / s_sqrt2))
            eat = min(eat, t)
            lst = max(lst, t)
            tog += 1
        if s_sqrt2 > 0:
            if m < 0:
                acc += 1
            if acc >= 0.99:
                val = 1
            elif acc > 0.01:
                seed = (seed << 4) + (vector << 20) + c_loc
                seed = int(0xDEECE66D) * seed + 0xB
                seed = int(0xDEECE66D) * seed + 0xB
                rnd = float((seed >> 8) & 0xffffff) / float(1 << 24)
                val = rnd < acc
            else:
                val = 0
        else:
            acc = val
        return (w[0] <= TMIN), eat, lst, final, acc, val, 0, ovl
@numba.njit
 def level_eval(ops, op_start, op_stop, c, vat, st_start, st_stop, line_times, params, sd, seed):
    overflows = 0
    for op_idx in range(op_start, op_stop):
        op = ops[op_idx]
        for st_idx in range(st_start, st_stop):
            overflows += wave_eval(op, c, vat, st_idx, line_times, params[st_idx], sd, seed)
    return overflows
@numba.njit
 def rand_gauss(seed, sd):
    clamp = 0.5
    if sd <= 0.0:
        return 1.0
    while True:
        x = -6.0
        for _ in range(12):
            seed = int(0xDEECE66D) * seed + 0xB
            x += float((seed >> 8) & 0xffffff) / float(1 << 24)
        x *= sd
        if abs(x) <= clamp:
            break
    return x + 1.0
@numba.njit
 def wave_eval(op, cbuf, vat, st_idx, line_times, param, sd=0.0, seed=0):
    lut, z_idx, a_idx, b_idx, c_idx, d_idx = op
    overflows = int(0)
    _seed = (seed << 4) + (z_idx << 20) + (st_idx << 1)
    a_mem = vat[a_idx, 0]
    b_mem = vat[b_idx, 0]
    c_mem = vat[c_idx, 0]
    d_mem = vat[d_idx, 0]
    z_mem, z_cap, _ = vat[z_idx]
    a_cur = int(0)
    b_cur = int(0)
    c_cur = int(0)
    d_cur = int(0)                                          
    z_cur = lut & 1
    if z_cur == 1:
        cbuf[z_mem, st_idx] = TMIN
    a = cbuf[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss(_seed ^ a_mem ^ z_cur, sd) * param[0]
    if int(param[1]) == a_idx: a += param[2+z_cur]
    b = cbuf[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss(_seed ^ b_mem ^ z_cur, sd) * param[0]
    if int(param[1]) == b_idx: b += param[2+z_cur]
    c = cbuf[c_mem, st_idx] + line_times[c_idx, 0, z_cur] * rand_gauss(_seed ^ c_mem ^ z_cur, sd) * param[0]
    if int(param[1]) == c_idx: c += param[2+z_cur]
    d = cbuf[d_mem, st_idx] + line_times[d_idx, 0, z_cur] * rand_gauss(_seed ^ d_mem ^ z_cur, sd) * param[0]
    if int(param[1]) == d_idx: d += param[2+z_cur]
    previous_t = TMIN
    current_t = min(a, b, c, d)
    inputs = int(0)
    while current_t < TMAX:
        z_val = z_cur & 1
        if a == current_t:
            a_cur += 1
            a = cbuf[a_mem + a_cur, st_idx]
            a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ a_mem ^ z_val ^ 1, sd) * param[0]
            thresh = line_times[a_idx, 1, z_val] * rand_gauss(_seed ^ a_mem ^ z_val, sd) * param[0]
            if int(param[1]) == a_idx:
                a += param[2+(z_val^1)]
                thresh += param[2+z_val]
            inputs ^= 1
            next_t = a   
        elif b == current_t:
            b_cur += 1
            b = cbuf[b_mem + b_cur, st_idx]
            b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ b_mem ^ z_val ^ 1, sd) * param[0]
            thresh = line_times[b_idx, 1, z_val] * rand_gauss(_seed ^ b_mem ^ z_val, sd) * param[0]
            if int(param[1]) == b_idx:
                b += param[2+(z_val^1)]
                thresh += param[2+z_val]
            inputs ^= 2
            next_t = b
        elif c == current_t:
            c_cur += 1
            c = cbuf[c_mem + c_cur, st_idx]
            c += line_times[c_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ c_mem ^ z_val ^ 1, sd) * param[0]
            thresh = line_times[c_idx, 1, z_val] * rand_gauss(_seed ^ c_mem ^ z_val, sd) * param[0]
            if int(param[1]) == c_idx:
                c += param[2+(z_val^1)]
                thresh += param[2+z_val]
            inputs ^= 4
            next_t = c 
        else:
            d_cur += 1
            d = cbuf[d_mem + d_cur, st_idx]
            d += line_times[d_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ d_mem ^ z_val ^ 1, sd) * param[0]
            thresh = line_times[d_idx, 1, z_val] * rand_gauss(_seed ^ d_mem ^ z_val, sd) * param[0]
            if int(param[1]) == d_idx:
                d += param[2+(z_val^1)]
                thresh += param[2+z_val]
            inputs ^= 8
            next_t = d 
        #print("previous_t",previous_t)
        #print("current_t",current_t) 
        #print(current_t - previous_t)
        #print(thresh)
        #print(z_cur & 1)
        #print((lut >> inputs) & 1)
        if (z_cur & 1) != ((lut >> inputs) & 1):
            # we generate a toggle in z_mem, if:
            #   ( it is the first toggle in z_mem OR
            #   following toggle is earlier OR
            #   pulse is wide enough ) AND enough space in z_mem.
            if z_cur == 0 or next_t < current_t or (current_t - previous_t) > thresh:
                #print(current_t - previous_t)
                #print(thresh)
                #print(z_cap)
                if z_cur < (z_cap - 1):
                    cbuf[z_mem + z_cur, st_idx] = current_t
                    #print(cbuf[z_mem + z_cur, st_idx])
                    previous_t = current_t
                    z_cur += 1
                else:
                    overflows += 1
                    previous_t = cbuf[z_mem + z_cur - 1, st_idx]
                    z_cur -= 1
            else:
                #print(a)
                z_cur -= 1
                if z_cur > 0:
                    previous_t = cbuf[z_mem + z_cur - 1, st_idx]
                else:
                    previous_t = TMIN
        current_t = min(a, b, c, d)
    if overflows > 0:
        cbuf[z_mem + z_cur, st_idx] = TMAX_OVL
    else:
        cbuf[z_mem + z_cur, st_idx] = a if a == max(a, b, c, d) else b if b == max(a, b, c, d) else c if c == max(a, b, c, d) else d   # propagate overflow flags by storing biggest TMAX from input
    return overflows
--- a/src/kyupy/wave_sim_4ig.py
+++ b/src/kyupy/wave_sim_4ig.py
--- a/tests/test_wave_sim4.py
+++ b/tests/test_wave_sim4.py
@ -0,0 +1,174 @@
 import numpy as np
 from kyupy.wave_sim4 import WaveSim, wave_eval, TMIN, TMAX
 from kyupy.logic_sim import LogicSim
 from kyupy import verilog, sdf, logic, bench
 from kyupy.logic import MVArray, BPArray
 from kyupy.sim import SimPrim
 def test_nand_delays():
    op = (SimPrim.NAND4, 4, 0, 1, 2, 3)
    #op = (0b0111, 4, 0, 1)
    c = np.full((5*16, 1), TMAX)  # 5 waveforms of capacity 16
    vat = np.zeros((5, 3), dtype='int')
    for i in range(5): vat[i] = i*16, 16, 0  # 1:1 mapping
    # SDF specifies IOPATH delays with respect to output polarity
    # SDF pulse rejection value is determined by IOPATH causing last transition and polarity of last transition
    line_times = np.zeros((5, 2, 2))
    line_times[0, 0, 0] = 0.1  # A -> Z rise delay
    line_times[0, 0, 1] = 0.2  # A -> Z fall delay
    line_times[0, 1, 0] = 0.1  # A -> Z negative pulse limit (terminate in rising Z)
    line_times[0, 1, 1] = 0.2  # A -> Z positive pulse limit
    line_times[1, :, 0] = 0.3  # as above for B -> Z
    line_times[1, :, 1] = 0.4
    line_times[2, :, 0] = 0.5  # as above for C -> Z
    line_times[2, :, 1] = 0.6
    line_times[3, :, 0] = 0.7  # as above for D -> Z
    line_times[3, :, 1] = 0.8
    sdata = np.asarray([1, -1, 0, 0], dtype='float32')
    def wave_assert(inputs, output):
        for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i
        wave_eval(op, c, vat, 0, line_times, sdata)
        for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[4,i], v)
    wave_assert([[TMAX,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(0,0,1,1) => 1
    wave_assert([[TMIN,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(1,0,1,1) => 1
    wave_assert([[TMIN,TMAX],[TMIN,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMAX])      # NAND(1,1,1,1) => 0
    # Keep inputs C=1 and D=1.
    wave_assert([[1,TMAX],[2,TMAX]], [TMIN,2.4,TMAX])              # _/⎺⎺⎺ NAND __/⎺⎺ => ⎺⎺⎺\___ (B->Z fall delay)
    wave_assert([[TMIN,TMAX],[TMIN,2,TMAX]],  [2.3,TMAX])          # ⎺⎺⎺⎺⎺ NAND ⎺⎺\__ => ___/⎺⎺⎺ (B->Z rise delay)
    wave_assert([[TMIN,TMAX],[TMIN,2,2.35,TMAX]], [2.3,2.75,TMAX]) # ⎺⎺⎺⎺⎺ NAND ⎺\_/⎺ => __/⎺⎺\_ (pos pulse, .35@B -> .45@Z)
    wave_assert([[TMIN,TMAX],[TMIN,2,2.25,TMAX]], [TMAX])          # ⎺⎺⎺⎺⎺ NAND ⎺\_/⎺ => _______ (pos pulse, .25@B -> .35@Z, filtered)
    wave_assert([[TMIN,TMAX],[2,2.45,TMAX]], [TMIN,2.4,2.75,TMAX]) # ⎺⎺⎺⎺⎺ NAND _/⎺\_ => ⎺⎺\_/⎺⎺ (neg pulse, .45@B -> .35@Z)
    wave_assert([[TMIN,TMAX],[2,2.35,TMAX]], [TMIN,TMAX])          # ⎺⎺⎺⎺⎺ NAND _/⎺\_ => ⎺⎺⎺⎺⎺⎺⎺ (neg pulse, .35@B -> .25@Z, filtered)
 def test_tiny_circuit():
    c = bench.parse('input(x, y) output(a, o, n) a=and(x,y) o=or(x,y) n=not(x)')
    lt = np.zeros((len(c.lines), 2, 2))
    lt[:,0,:] = 1.0  # unit delay for all lines
    wsim = WaveSim(c, lt)
    print(wsim.prim_counts)
    assert len(wsim.s) == 5
    # values for x
    wsim.s[0,0,:3] = 0, 0.1, 0
    wsim.s[0,1,:3] = 0, 0.2, 1
    wsim.s[0,2,:3] = 1, 0.3, 0
    wsim.s[0,3,:3] = 1, 0.4, 1
    # values for y
    wsim.s[1,0,:3] = 1, 0.5, 0
    wsim.s[1,1,:3] = 1, 0.6, 0
    wsim.s[1,2,:3] = 1, 0.7, 0
    wsim.s[1,3,:3] = 0, 0.8, 1
    wsim.s_to_c()
    x_c_loc = wsim.vat[wsim.ppi_offset+0, 0] # check x waveforms
    np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 0], [TMAX, TMAX, TMAX])
    np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 1], [0.2, TMAX, TMAX])
    np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 2], [TMIN, 0.3, TMAX])
    np.testing.assert_allclose(wsim.c[x_c_loc:x_c_loc+3, 3], [TMIN, TMAX, TMAX])
    y_c_loc = wsim.vat[wsim.ppi_offset+1, 0] # check y waveforms
    np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 0], [TMIN, 0.5, TMAX])
    np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 1], [TMIN, 0.6, TMAX])
    np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 2], [TMIN, 0.7, TMAX])
    np.testing.assert_allclose(wsim.c[y_c_loc:y_c_loc+3, 3], [0.8, TMAX, TMAX])
    wsim.c_prop()
    a_c_loc = wsim.vat[wsim.ppo_offset+2, 0] # check a waveforms
    np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 0], [TMAX, TMAX, TMAX])
    np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 1], [1.2, 1.6, TMAX])
    np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 2], [TMIN, 1.3, TMAX])
    np.testing.assert_allclose(wsim.c[a_c_loc:a_c_loc+3, 3], [1.8, TMAX, TMAX])
    o_c_loc = wsim.vat[wsim.ppo_offset+3, 0] # check o waveforms
    np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 0], [TMIN, 1.5, TMAX])
    np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 1], [TMIN, TMAX, TMAX])
    np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 2], [TMIN, 1.7, TMAX])
    np.testing.assert_allclose(wsim.c[o_c_loc:o_c_loc+3, 3], [TMIN, TMAX, TMAX])
    n_c_loc = wsim.vat[wsim.ppo_offset+4, 0] # check n waveforms
    np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 0], [TMIN, TMAX, TMAX])
    np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 1], [TMIN, 1.2, TMAX])
    np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 2], [1.3, TMAX, TMAX])
    np.testing.assert_allclose(wsim.c[n_c_loc:n_c_loc+3, 3], [TMAX, TMAX, TMAX])
    wsim.c_to_s()
    # check a captures
    np.testing.assert_allclose(wsim.s[2, 0, 3:7], [0, TMAX, TMIN, 0])
    np.testing.assert_allclose(wsim.s[2, 1, 3:7], [0, 1.2, 1.6, 0])
    np.testing.assert_allclose(wsim.s[2, 2, 3:7], [1, 1.3, 1.3, 0])
    np.testing.assert_allclose(wsim.s[2, 3, 3:7], [0, 1.8, 1.8, 1])
    # check o captures
    np.testing.assert_allclose(wsim.s[3, 0, 3:7], [1, 1.5, 1.5, 0])
    np.testing.assert_allclose(wsim.s[3, 1, 3:7], [1, TMAX, TMIN, 1])
    np.testing.assert_allclose(wsim.s[3, 2, 3:7], [1, 1.7, 1.7, 0])
    np.testing.assert_allclose(wsim.s[3, 3, 3:7], [1, TMAX, TMIN, 1])
    # check o captures
    np.testing.assert_allclose(wsim.s[4, 0, 3:7], [1, TMAX, TMIN, 1])
    np.testing.assert_allclose(wsim.s[4, 1, 3:7], [1, 1.2, 1.2, 0])
    np.testing.assert_allclose(wsim.s[4, 2, 3:7], [0, 1.3, 1.3, 1])
    np.testing.assert_allclose(wsim.s[4, 3, 3:7], [0, TMAX, TMIN, 0])
 def compare_to_logic_sim(wsim: WaveSim):
    tests = MVArray((len(wsim.s_nodes), wsim.sims))
    choices = np.asarray([logic.ZERO, logic.ONE, logic.RISE, logic.FALL], dtype=np.uint8)
    rng = np.random.default_rng(10)
    tests.data[...] = rng.choice(choices, tests.data.shape)
    wsim.s[..., 0] = (tests.data & 2) >> 1
    wsim.s[..., 3] = (tests.data & 2) >> 1
    wsim.s[..., 1] = 0.0
    wsim.s[..., 2] = tests.data & 1
    wsim.s[..., 6] = tests.data & 1
    wsim.s_to_c()
    wsim.c_prop()
    wsim.c_to_s()
    resp = MVArray(tests)
    resp.data[...] = wsim.s[..., 6].astype(np.uint8) | (wsim.s[..., 3].astype(np.uint8)<<1)
    resp.data |= ((resp.data ^ (resp.data >> 1)) & 1) << 2  # transitions
    tests_bp = BPArray(tests)    
    lsim = LogicSim(wsim.circuit, len(tests_bp))
    lsim.assign(tests_bp)
    lsim.propagate()
    exp_bp = BPArray(tests_bp)
    lsim.capture(exp_bp)
    exp = MVArray(exp_bp)
    for i in range(8):
        exp_str = exp[i].replace('P', '0').replace('N', '1')
        res_str = resp[i].replace('P', '0').replace('N', '1')
        assert res_str == exp_str
 def test_b14(mydir):
    c = verilog.load(mydir / 'b14.v.gz', branchforks=True)
    df = sdf.load(mydir / 'b14.sdf.gz')
    lt = df.annotation(c)
    wsim = WaveSim(c, lt, 8)
    compare_to_logic_sim(wsim)
 def test_b14_strip_forks(mydir):
    c = verilog.load(mydir / 'b14.v.gz', branchforks=True)
    df = sdf.load(mydir / 'b14.sdf.gz')
    lt = df.annotation(c)
    wsim = WaveSim(c, lt, 8, strip_forks=True)
    compare_to_logic_sim(wsim)