Circuit pickle, STIL/SDF/techlib fixes, sdata

- fix pin indices for various SC lib variants - SDF annotation improvements - STIL loading improvements - Support for per-simulation parameters in WaveSim - Circuit is now pickleable and comparable
4 years ago · 840b816804
16 changed files with 371 additions and 110 deletions
--- a/Demo.ipynb
+++ b/Demo.ipynb
@ -1009,7 +1009,7 @@
				@@ -1009,7 +1009,7 @@
    {
     "data": {
      "text/plain": [
-       "119676"
+       "120628"
      ]
     },
     "execution_count": 36,
@ -1195,7 +1195,7 @@
				@@ -1195,7 +1195,7 @@
    {
     "data": {
      "text/plain": [
-       "2.0610005855560303"
+       "2.17240047454834"
      ]
     },
     "execution_count": 42,
@ -1222,7 +1222,7 @@
				@@ -1222,7 +1222,7 @@
    {
     "data": {
      "text/plain": [
-       "0.0"
+       "2.0"
      ]
     },
     "execution_count": 43,
@ -1286,13 +1286,17 @@
				@@ -1286,13 +1286,17 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Found 1 CUDA devices\n",
-      "id 0              b'TITAN V'                              [SUPPORTED]\n",
+      "Found 2 CUDA devices\n",
+      "id 0    b'NVIDIA GeForce RTX 3090'                              [SUPPORTED]\n",
+      "                      compute capability: 8.6\n",
+      "                           pci device id: 0\n",
+      "                              pci bus id: 3\n",
+      "id 1       b'NVIDIA TITAN V'                              [SUPPORTED]\n",
      "                      compute capability: 7.0\n",
      "                           pci device id: 0\n",
      "                              pci bus id: 2\n",
      "Summary:\n",
-      "\t1/1 devices are supported\n"
+      "\t2/2 devices are supported\n"
     ]
    },
    {
@ -1322,9 +1326,9 @@
				@@ -1322,9 +1326,9 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "worker",
   "language": "python",
-   "name": "python3"
+   "name": "worker"
  },
  "language_info": {
   "codemirror_mode": {
@ -1336,7 +1340,7 @@
				@@ -1336,7 +1340,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.6.13"
  }
 },
 "nbformat": 4,
--- a/LICENSE.txt
+++ b/LICENSE.txt
@ -1,6 +1,6 @@
				@@ -1,6 +1,6 @@
 MIT License

-Copyright (c) 2020-2021 Stefan Holst
+Copyright (c) 2020-2022 Stefan Holst

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/setup.py
+++ b/setup.py
@ -5,7 +5,7 @@ with open('README.rst', 'r') as f:
				@@ -5,7 +5,7 @@ with open('README.rst', 'r') as f:

 setup(
    name='kyupy',
-    version='0.0.3',
+    version='0.0.4',
    description='High-performance processing and analysis of non-hierarchical VLSI designs',
    long_description=long_description,
    long_description_content_type='text/x-rst',
--- a/src/kyupy/init.py
+++ b/src/kyupy/init.py
@ -78,6 +78,7 @@ class Log:
				@@ -78,6 +78,7 @@ class Log:
    """A very simple logger that formats the messages with the number of seconds since
    program start.
    """
+
    def __init__(self):
        self.start = time.perf_counter()
        self.logfile = None
@ -85,6 +86,13 @@ class Log:
				@@ -85,6 +86,13 @@ class Log:
        After each write, ``flush()`` is called as well.
        """

+    def __getstate__(self):
+        return {'elapsed': time.perf_counter() - self.start}
+
+    def __setstate__(self, state):
+        self.logfile = None
+        self.start = time.perf_counter() - state['elapsed']
+
    def log(self, level, message):
        t = time.perf_counter() - self.start
        if self.logfile is None:
@ -120,7 +128,8 @@ class Log:
				@@ -120,7 +128,8 @@ class Log:
                elapsed_time = current_time - start_time
                total_time = elapsed_time / done
                rem_time = total_time - elapsed_time
-                self.log(':', f'{done*100:.0f}% done {hr_time(elapsed_time)} elapsed {hr_time(rem_time)} remaining')
+                self.log(
+                    ':', f'{done*100:.0f}% done {hr_time(elapsed_time)} elapsed {hr_time(rem_time)} remaining')
                log_interval = min(600, int(log_interval*1.5))
                lastlog_time = current_time

@ -167,8 +176,10 @@ class MockCuda:
				@@ -167,8 +176,10 @@ class MockCuda:
                            for grid_y in range(grid_dim[1]):
                                for block_x in range(block_dim[0]):
                                    for block_y in range(block_dim[1]):
-                                        outer.x = grid_x * block_dim[0] + block_x
-                                        outer.y = grid_y * block_dim[1] + block_y
+                                        outer.x = grid_x * \
+                                            block_dim[0] + block_x
+                                        outer.y = grid_y * \
+                                            block_dim[1] + block_y
                                        self.func(*args, **kwargs)
                    return inner
            return Launcher(func)
--- a/src/kyupy/circuit.py
+++ b/src/kyupy/circuit.py
@ -99,6 +99,16 @@ class Node:
				@@ -99,6 +99,16 @@ class Node:
                del self.circuit.cells[self.name]
            self.circuit = None

+    def __eq__(self, other):
+        """Checks equality of node name and kind. Does not check pin connections.
+
+        This is ok, because (name, kind) is unique within a circuit.
+        """
+        return self.name == other.name and self.kind == other.kind
+
+    def __hash__(self):
+        return hash((self.name, self.kind))
+

 class Line:
    """A line is a directional 1:1 connection between two nodes.
@ -172,6 +182,13 @@ class Line:
				@@ -172,6 +182,13 @@ class Line:
    def __lt__(self, other):
        return self.index < other.index

+    def __eq__(self, other):
+        return self.driver == other.driver and self.driver_pin == other.driver_pin and \
+               self.reader == other.reader and self.reader_pin == other.reader_pin
+
+    def __hash__(self):
+        return hash((self.driver, self.driver_pin, self.reader, self.reader_pin))
+

 class Circuit:
    """A Circuit is a container for interconnected nodes and lines.
@ -238,6 +255,32 @@ class Circuit:
				@@ -238,6 +255,32 @@ class Circuit:
            c.interface.append(n)
        return c

+    def __getstate__(self):
+        nodes = [(node.name, node.kind) for node in self.nodes]
+        lines = [(line.driver.index, line.driver_pin, line.reader.index, line.reader_pin) for line in self.lines]
+        interface = [n.index for n in self.interface]
+        return {'name': self.name,
+                'nodes': nodes,
+                'lines': lines,
+                'interface': interface }
+
+    def __setstate__(self, state):
+        self.name = state['name']
+        self.nodes = IndexList()
+        self.lines = IndexList()
+        self.interface = GrowingList()
+        self.cells = {}
+        self.forks = {}
+        for s in state['nodes']:
+            Node(self, *s)
+        for driver, driver_pin, reader, reader_pin in state['lines']:
+            Line(self, (self.nodes[driver], driver_pin), (self.nodes[reader], reader_pin))
+        for n in state['interface']:
+            self.interface.append(self.nodes[n])
+
+    def __eq__(self, other):
+        return self.nodes == other.nodes and self.lines == other.lines and self.interface == other.interface
+
    def dump(self):
        """Returns a string representation of the circuit and all its nodes.
        """
@ -256,14 +299,14 @@ class Circuit:
				@@ -256,14 +299,14 @@ class Circuit:
        yielded first.
        """
        visit_count = [0] * len(self.nodes)
-        queue = deque(n for n in self.nodes if len(n.ins) == 0 or 'DFF' in n.kind)
+        queue = deque(n for n in self.nodes if len(n.ins) == 0 or 'dff' in n.kind.lower())
        while len(queue) > 0:
            n = queue.popleft()
            for line in n.outs:
                if line is None: continue
                succ = line.reader
                visit_count[succ] += 1
-                if visit_count[succ] == len(succ.ins) and 'DFF' not in succ.kind:
+                if visit_count[succ] == len(succ.ins) and 'dff' not in succ.kind.lower():
                    queue.append(succ)
            yield n

@ -282,13 +325,13 @@ class Circuit:
				@@ -282,13 +325,13 @@ class Circuit:
        yielded first.
        """
        visit_count = [0] * len(self.nodes)
-        queue = deque(n for n in self.nodes if len(n.outs) == 0 or 'DFF' in n.kind)
+        queue = deque(n for n in self.nodes if len(n.outs) == 0 or 'dff' in n.kind.lower())
        while len(queue) > 0:
            n = queue.popleft()
            for line in n.ins:
                pred = line.driver
                visit_count[pred] += 1
-                if visit_count[pred] == len(pred.outs) and 'DFF' not in pred.kind:
+                if visit_count[pred] == len(pred.outs) and 'dff' not in pred.kind.lower():
                    queue.append(pred)
            yield n

@ -310,21 +353,21 @@ class Circuit:
				@@ -310,21 +353,21 @@ class Circuit:

    def fanout_free_regions(self):
        for stem in self.reversed_topological_order():
-            if len(stem.outs) == 1 and 'DFF' not in stem.kind: continue
+            if len(stem.outs) == 1 and 'dff' not in stem.kind.lower(): continue
            region = []
-            if 'DFF' in stem.kind:
+            if 'dff' in stem.kind.lower():
                n = stem.ins[0]
-                if len(n.driver.outs) == 1 and 'DFF' not in n.driver.kind:
+                if len(n.driver.outs) == 1 and 'dff' not in n.driver.kind.lower():
                    queue = deque([n.driver])
                else:
                    queue = deque()
            else:
                queue = deque(n.driver for n in stem.ins
-                              if len(n.driver.outs) == 1 and 'DFF' not in n.driver.kind)
+                              if len(n.driver.outs) == 1 and 'dff' not in n.driver.kind.lower())
            while len(queue) > 0:
                n = queue.popleft()
                preds = [pred.driver for pred in n.ins
-                         if len(pred.driver.outs) == 1 and 'DFF' not in pred.driver.kind]
+                         if len(pred.driver.outs) == 1 and 'dff' not in pred.driver.kind.lower()]
                queue.extend(preds)
                region.append(n)
            yield stem, region
--- a/src/kyupy/logic.py
+++ b/src/kyupy/logic.py
@ -291,6 +291,23 @@ def mv_xor(x1, x2, out=None):
				@@ -291,6 +291,23 @@ def mv_xor(x1, x2, out=None):
    return out


+def mv_latch(d, t, q_prev, out=None):
+    """A latch that is transparent if `t` is high. `q_prev` has to be the output value from the previous clock cycle.
+    """
+    m = mv_getm(d, t, q_prev)
+    d, t, q_prev = mv_cast(d, t, q_prev, m=m)
+    out = out or MVArray(np.broadcast(d.data, t.data, q_prev).shape, m=m)
+    out.data[...] = t.data & d.data & 0b011
+    out.data[...] |= ~t.data & 0b010 & (q_prev.data << 1)
+    out.data[...] |= ~t.data & 0b001 & (out.data >> 1)
+    out.data[...] |= ((out.data << 1) ^ (out.data << 2)) & 0b100
+    unknown = (t.data == UNKNOWN) \
+              | (t.data == UNASSIGNED) \
+              | (((d.data == UNKNOWN) | (d.data == UNASSIGNED)) & (t.data != ZERO))
+    np.putmask(out.data, unknown, UNKNOWN)
+    return out
+
+
 def mv_transition(init, final, out=None):
    """Computes the logic transitions from the initial values of ``init`` to the final values of ``final``.
    Pulses in the input data are ignored. If any of the inputs are ``UNKNOWN``, the result is ``UNKNOWN``.
@ -460,3 +477,27 @@ def bp_xor(out, *ins):
				@@ -460,3 +477,27 @@ def bp_xor(out, *ins):
        out[..., 0, :] |= any_unknown
        out[..., 1, :] &= ~any_unknown
        out[..., 2, :] &= ~any_unknown
+
+
+def bp_latch(out, d, t, q_prev):
+    md = out.shape[-2]
+    assert md == d.shape[-2]
+    assert md == t.shape[-2]
+    assert md == q_prev.shape[-2]
+    if md == 1:
+        out[...] = (d & t) | (q_prev & ~t)
+    elif md == 2:
+        any_unknown = t[..., 0, :] ^ t[..., 1, :]
+        any_unknown |= (d[..., 0, :] ^ d[..., 1, :]) & (t[..., 0, :] | t[..., 1, :])
+        out[...] = (d & t) | (q_prev & ~t)
+        out[..., 0, :] |= any_unknown
+        out[..., 1, :] &= ~any_unknown
+    else:
+        any_unknown = (t[..., 0, :] ^ t[..., 1, :]) & ~t[..., 2, :]
+        any_unknown |= ((d[..., 0, :] ^ d[..., 1, :]) & ~d[..., 2, :]) & (t[..., 0, :] | t[..., 1, :] | t[..., 2, :])
+        out[..., 1, :] = (d[..., 1, :] & t[..., 1, :]) | (q_prev[..., 0, :] & ~t[..., 1, :])
+        out[..., 0, :] = (d[..., 0, :] & t[..., 0, :]) | (out[..., 1, :] & ~t[..., 0, :])
+        out[..., 2, :] = out[..., 1, :] ^ out[..., 0, :]
+        out[..., 0, :] |= any_unknown
+        out[..., 1, :] &= ~any_unknown
+        out[..., 2, :] &= ~any_unknown
--- a/src/kyupy/logic_sim.py
+++ b/src/kyupy/logic_sim.py
@ -30,15 +30,22 @@ class LogicSim:
				@@ -30,15 +30,22 @@ class LogicSim:
        self.circuit = circuit
        self.sims = sims
        nbytes = (sims - 1) // 8 + 1
-        self.interface = list(circuit.interface) + [n for n in circuit.nodes if 'dff' in n.kind.lower()]
+        dffs = [n for n in circuit.nodes if 'dff' in n.kind.lower()]
+        latches = [n for n in circuit.nodes if 'latch' in n.kind.lower()]
+        self.interface = list(circuit.interface) + dffs + latches
+
        self.width = len(self.interface)
        """The number of bits in the circuit state (number of ports + number of state-elements)."""
+
        self.state = np.zeros((len(circuit.lines), mdim, nbytes), dtype='uint8')
        self.state_epoch = np.zeros(len(circuit.nodes), dtype='int8') - 1
        self.tmp = np.zeros((5, mdim, nbytes), dtype='uint8')
        self.zero = np.zeros((mdim, nbytes), dtype='uint8')
        self.epoch = 0

+        self.latch_dict = dict((n.index, i) for i, n in enumerate(latches))
+        self.latch_state = np.zeros((len(latches), mdim, nbytes), dtype='uint8')
+
        known_fct = [(f[:-4], getattr(self, f)) for f in dir(self) if f.endswith('_fct')]
        self.node_fct = []
        for n in circuit.nodes:
@ -69,8 +76,11 @@ class LogicSim:
				@@ -69,8 +76,11 @@ class LogicSim:
        """
        for node, stim in zip(self.interface, stimuli.data if hasattr(stimuli, 'data') else stimuli):
            if len(node.outs) == 0: continue
-            outputs = [self.state[line] if line else self.tmp[3] for line in node.outs]
-            self.node_fct[node]([stim], outputs)
+            if node.index in self.latch_dict:
+                self.latch_state[self.latch_dict[node.index]] = stim
+            else:
+                outputs = [self.state[line] if line else self.tmp[3] for line in node.outs]
+                self.node_fct[node]([stim], outputs)
            for line in node.outs:
                if line is not None: self.state_epoch[line.reader] = self.epoch
        for n in self.circuit.nodes:
@ -83,13 +93,29 @@ class LogicSim:
				@@ -83,13 +93,29 @@ class LogicSim:

    def capture(self, responses):
        """Capture the current values at the primary outputs and in the state-elements (flip-flops).
+        For primary outputs, the logic value is stored unmodified in the given target array.
+        For flip-flops, the logic value is constructed from the previous state and the new state.

        :param responses: A bit-parallel storage target for the responses in a compatible shape.
        :type responses: :py:class:`~kyupy.logic.BPArray`
        :returns: The given responses object.
        """
        for node, resp in zip(self.interface, responses.data if hasattr(responses, 'data') else responses):
-            if len(node.ins) > 0: resp[...] = self.state[node.ins[0]]
+            if len(node.ins) == 0: continue
+            if node.index in self.latch_dict:
+                resp[...] = self.state[node.outs[0]]
+            else:
+                resp[...] = self.state[node.ins[0]]
+            # FIXME: unclear why we should use outs for DFFs
+            #if self.m > 2 and 'dff' in node.kind.lower() and len(node.outs) > 0:
+            #    if node.outs[0] is None:
+            #        resp[1, :] = ~self.state[node.outs[1], 0, :]  # assume QN is connected, take inverse of that.
+            #    else:
+            #        resp[1, :] = self.state[node.outs[0], 0, :]
+            #    if self.m > 4:
+            #        resp[..., 2, :] = resp[..., 0, :] ^ resp[..., 1, :]
+            #    # We don't handle X or - correctly.
+
        return responses

    def propagate(self, inject_cb=None):
@ -116,7 +142,8 @@ class LogicSim:
				@@ -116,7 +142,8 @@ class LogicSim:
            if self.state_epoch[node] != self.epoch: continue
            inputs = [self.state[line] if line else self.zero for line in node.ins]
            outputs = [self.state[line] if line else self.tmp[3] for line in node.outs]
-            # print('sim', node)
+            if node.index in self.latch_dict:
+                inputs.append(self.latch_state[self.latch_dict[node.index]])
            self.node_fct[node](inputs, outputs)
            for line in node.outs:
                if inject_cb is not None: inject_cb(line, self.state[line])
@ -137,59 +164,57 @@ class LogicSim:
				@@ -137,59 +164,57 @@ class LogicSim:
        self.propagate(inject_cb)
        return self.capture(state)

-    @staticmethod
-    def fork_fct(inputs, outputs):
+    def fork_fct(self, inputs, outputs):
        for o in outputs: o[...] = inputs[0]

-    @staticmethod
-    def const0_fct(_, outputs):
+    def const0_fct(self, _, outputs):
        for o in outputs: o[...] = 0

-    @staticmethod
-    def const1_fct(_, outputs):
+    def const1_fct(self, _, outputs):
        for o in outputs:
            o[...] = 0
            logic.bp_not(o, o)

-    @staticmethod
-    def not_fct(inputs, outputs):
+    def not_fct(self, inputs, outputs):
        logic.bp_not(outputs[0], inputs[0])

-    @staticmethod
-    def and_fct(inputs, outputs):
+    def and_fct(self, inputs, outputs):
        logic.bp_and(outputs[0], *inputs)

-    @staticmethod
-    def or_fct(inputs, outputs):
+    def or_fct(self, inputs, outputs):
        logic.bp_or(outputs[0], *inputs)

-    @staticmethod
-    def xor_fct(inputs, outputs):
+    def xor_fct(self, inputs, outputs):
        logic.bp_xor(outputs[0], *inputs)

-    @staticmethod
-    def sdff_fct(inputs, outputs):
+    def sdff_fct(self, inputs, outputs):
        logic.bp_buf(outputs[0], inputs[0])
        if len(outputs) > 1:
            logic.bp_not(outputs[1], inputs[0])

-    @staticmethod
-    def dff_fct(inputs, outputs):
+    def dff_fct(self, inputs, outputs):
        logic.bp_buf(outputs[0], inputs[0])
        if len(outputs) > 1:
            logic.bp_not(outputs[1], inputs[0])

-    @staticmethod
-    def nand_fct(inputs, outputs):
+    def latch_fct(self, inputs, outputs):
+        logic.bp_latch(outputs[0], inputs[0], inputs[1], inputs[2])
+        if len(outputs) > 1:
+            logic.bp_not(outputs[1], inputs[0])
+
+    def nand_fct(self, inputs, outputs):
        logic.bp_and(outputs[0], *inputs)
        logic.bp_not(outputs[0], outputs[0])

-    @staticmethod
-    def nor_fct(inputs, outputs):
+    def nor_fct(self, inputs, outputs):
        logic.bp_or(outputs[0], *inputs)
        logic.bp_not(outputs[0], outputs[0])

-    @staticmethod
-    def xnor_fct(inputs, outputs):
+    def xnor_fct(self, inputs, outputs):
        logic.bp_xor(outputs[0], *inputs)
        logic.bp_not(outputs[0], outputs[0])
+
+    def aoi21_fct(self, inputs, outputs):
+        logic.bp_and(self.tmp[0], inputs[0], inputs[1])
+        logic.bp_or(outputs[0], self.tmp[0], inputs[2])
+        logic.bp_not(outputs[0], outputs[0])
--- a/src/kyupy/sdf.py
+++ b/src/kyupy/sdf.py
@ -92,7 +92,7 @@ class DelayFile:
				@@ -92,7 +92,7 @@ class DelayFile:
                    continue
                cell = find_cell(cn)
                if cell is None:
-                    log.warn(f'Cell from SDF not found in circuit: {cn}')
+                    #log.warn(f'Cell from SDF not found in circuit: {cn}')
                    continue
                ipn = re.sub(r'\((neg|pos)edge ([^)]+)\)', r'\2', ipn)
                ipin = tlib.pin_index(cell.kind, ipn)
@ -111,12 +111,15 @@ class DelayFile:
				@@ -111,12 +111,15 @@ class DelayFile:
                    if ffdelays and (len(cell.outs) > opin):
                        add_delays(cell.outs[opin])
                else:
-                    if kind.startswith(('xor', 'xnor')):
-                        # print(ipn, ipin, times[cell.i_lines[ipin], 0, 0])
-                        take_avg = timing[cell.ins[ipin]].sum() > 0
-                    add_delays(cell.ins[ipin])
-                    if take_avg:
-                        timing[cell.ins[ipin]] /= 2
+                    if ipin < len(cell.ins):
+                        if kind.startswith(('xor', 'xnor')):
+                            # print(ipn, ipin, times[cell.i_lines[ipin], 0, 0])
+                            take_avg = timing[cell.ins[ipin]].sum() > 0
+                        add_delays(cell.ins[ipin])
+                        if take_avg:
+                            timing[cell.ins[ipin]] /= 2
+                    else:
+                        log.warn(f'No line to annotate pin {ipn} of {cell}')

        if not interconnect or self.interconnects is None:
            return timing
@ -139,14 +142,17 @@ class DelayFile:
				@@ -139,14 +142,17 @@ class DelayFile:
                cn2, pn2 = (n2, 'IN')
            c1 = find_cell(cn1)
            if c1 is None:
-                log.warn(f'Cell from SDF not found in circuit: {cn1}')
+                #log.warn(f'Cell from SDF not found in circuit: {cn1}')
                continue
            c2 = find_cell(cn2)
            if c2 is None:
-                log.warn(f'Cell from SDF not found in circuit: {cn2}')
+                #log.warn(f'Cell from SDF not found in circuit: {cn2}')
                continue
            p1, p2 = tlib.pin_index(c1.kind, pn1), tlib.pin_index(c2.kind, pn2)
            line = None
+            if len(c2.ins) <= p2:
+                log.warn(f'No line to annotate pin {pn2} of {c2}')
+                continue
            f1, f2 = c1.outs[p1].reader, c2.ins[p2].driver
            if f1 != f2:  # possible branchfork
                assert len(f2.ins) == 1
--- a/src/kyupy/stil.py
+++ b/src/kyupy/stil.py
@ -40,19 +40,19 @@ class StilFile:
				@@ -40,19 +40,19 @@ class StilFile:
                unload = {}
                for so_port in self.so_ports:
                    if so_port in call.parameters:
-                        unload[so_port] = call.parameters[so_port].replace('\n', '')
-                if len(launch) > 0:
+                        unload[so_port] = call.parameters[so_port].replace('\n', '').replace('N', '-')
+                if len(capture) > 0:
                    self.patterns.append(ScanPattern(sload, launch, capture, unload))
                    capture = {}
                    launch = {}
                sload = {}
                for si_port in self.si_ports:
                    if si_port in call.parameters:
-                        sload[si_port] = call.parameters[si_port].replace('\n', '')
+                        sload[si_port] = call.parameters[si_port].replace('\n', '').replace('N', '-')
            if call.name.endswith('_launch'):
-                launch = dict((k, v.replace('\n', '')) for k, v in call.parameters.items())
+                launch = dict((k, v.replace('\n', '').replace('N', '-')) for k, v in call.parameters.items())
            if call.name.endswith('_capture'):
-                capture = dict((k, v.replace('\n', '')) for k, v in call.parameters.items())
+                capture = dict((k, v.replace('\n', '').replace('N', '-')) for k, v in call.parameters.items())

    def _maps(self, c):
        interface = list(c.interface) + [n for n in c.nodes if 'DFF' in n.kind]
@ -96,15 +96,15 @@ class StilFile:
				@@ -96,15 +96,15 @@ class StilFile:
            for si_port in self.si_ports.keys():
                pattern = logic.mv_xor(p.load[si_port], scan_inversions[si_port])
                tests.data[scan_maps[si_port], i] = pattern.data[:, 0]
-            tests.data[pi_map, i] = logic.MVArray(p.launch['_pi']).data[:, 0]
+            tests.data[pi_map, i] = logic.MVArray(p.capture['_pi']).data[:, 0]
        return tests

    def tests_loc(self, circuit):
        """Assembles and returns a LoC scan test pattern set for given circuit.

        This function assumes a launch-on-capture (LoC) delay test.
-        It performs a logic simulation to obtain the first capture pattern (the one that launches the
-        delay test) and assembles the test pattern set from from pairs for initialization- and launch-patterns.
+        It performs a logic simulation to obtain the first capture pattern (the one that launches the delay
+        test) and assembles the test pattern set from from pairs for initialization- and launch-patterns.
        """
        interface, pi_map, po_map, scan_maps, scan_inversions = self._maps(circuit)
        init = logic.MVArray((len(interface), len(self.patterns)), m=4)
@ -114,7 +114,7 @@ class StilFile:
				@@ -114,7 +114,7 @@ class StilFile:
            for si_port in self.si_ports.keys():
                pattern = logic.mv_xor(p.load[si_port], scan_inversions[si_port])
                init.data[scan_maps[si_port], i] = pattern.data[:, 0]
-            init.data[pi_map, i] = logic.MVArray(p.launch['_pi']).data[:, 0]
+            init.data[pi_map, i] = logic.MVArray(p.launch['_pi'] if '_pi' in p.launch else p.capture['_pi']).data[:, 0]
        launch_bp = logic.BPArray(init)
        sim4v = LogicSim(circuit, len(init), m=4)
        sim4v.assign(launch_bp)
@ -122,8 +122,8 @@ class StilFile:
				@@ -122,8 +122,8 @@ class StilFile:
        sim4v.capture(launch_bp)
        launch = logic.MVArray(launch_bp)
        for i, p in enumerate(self.patterns):
-            # if there was no launch clock, then init = launch
-            if ('P' not in p.launch['_pi']) or ('P' not in p.capture['_pi']):
+            # if there was no launch cycle or launch clock, then init = launch
+            if '_pi' not in p.launch or 'P' not in p.launch['_pi'] or 'P' not in p.capture['_pi']:
                for si_port in self.si_ports.keys():
                    pattern = logic.mv_xor(p.load[si_port], scan_inversions[si_port])
                    launch.data[scan_maps[si_port], i] = pattern.data[:, 0]
--- a/src/kyupy/techlib.py
+++ b/src/kyupy/techlib.py
@ -30,22 +30,24 @@ class TechLib:
				@@ -30,22 +30,24 @@ class TechLib:
    def pin_index(kind, pin):
        """Returns a pin list position for a given node kind and pin name."""
        if kind[:3] in ('OAI', 'AOI'):
-            if pin[0] == 'A': return int(pin[1])
-            if pin[0] == 'B': return int(pin[1]) + int(kind[4])
+            if pin[0] == 'A': return int(pin[1]) - 1
+            if pin[0] == 'B': return int(pin[1]) + int(kind[4]) - 1
        for prefix, pins, index in [('HADD', ('B0', 'SO'), 1),
-                                    ('MUX21', ('S',), 2),
+                                    ('MUX21', ('S', 'S0'), 2),
                                    ('MX2', ('S0',), 2),
                                    ('TBUF', ('OE',), 1),
                                    ('TINV', ('OE',), 1),
-                                    ('DFF', ('QN',), 1),
+                                    ('LATCH', ('D',), 0),
+                                    ('LATCH', ('QN',), 1),
                                    ('DFF', ('D',), 0),
+                                    ('DFF', ('QN',), 1),
                                    ('SDFF', ('D',), 0),
                                    ('SDFF', ('QN',), 1),
                                    ('SDFF', ('CLK',), 3),
-                                    ('SDFF', ('RSTB',), 4),
+                                    ('SDFF', ('RSTB', 'RN'), 4),
                                    ('SDFF', ('SETB',), 5)]:
            if kind.startswith(prefix) and pin in pins: return index
-        for index, pins in enumerate([('A1', 'IN1', 'A', 'S', 'INP', 'Q', 'QN', 'Y', 'Z', 'ZN'),
+        for index, pins in enumerate([('A1', 'IN1', 'A', 'S', 'INP', 'I', 'Q', 'QN', 'Y', 'Z', 'ZN'),
                                      ('A2', 'IN2', 'B', 'CK', 'CLK', 'CO', 'SE'),
                                      ('A3', 'IN3', 'C', 'RN', 'RSTB', 'CI', 'SI'),
                                      ('A4', 'IN4', 'D', 'SN', 'SETB'),
--- a/src/kyupy/wave_sim.py
+++ b/src/kyupy/wave_sim.py
@ -122,6 +122,9 @@ class WaveSim:
				@@ -122,6 +122,9 @@ class WaveSim:
        self.lst_eat_valid = False

        self.cdata = np.zeros((len(self.interface), sims, 7), dtype='float32')
+                     
+        self.sdata = np.zeros((sims, 4), dtype='float32')
+        self.sdata[...,0] = 1.0

        if isinstance(wavecaps, int):
            wavecaps = [wavecaps] * len(circuit.lines)
@ -158,7 +161,8 @@ class WaveSim:
				@@ -158,7 +161,8 @@ class WaveSim:
                if kind == '__fork__':
                    if not strip_forks:
                        for o_line in n.outs:
-                            ops.append((0b1010, o_line.index, i0_idx, i1_idx))
+                            if o_line is not None:
+                                ops.append((0b1010, o_line.index, i0_idx, i1_idx))
                elif kind.startswith('nand'):
                    ops.append((0b0111, o0_idx, i0_idx, i1_idx))
                elif kind.startswith('nor'):
@ -328,7 +332,7 @@ class WaveSim:
				@@ -328,7 +332,7 @@ class WaveSim:
        sims = min(sims or self.sims, self.sims)
        for op_start, op_stop in zip(self.level_starts, self.level_stops):
            self.overflows += level_eval(self.ops, op_start, op_stop, self.state, self.sat, 0, sims,
-                                         self.timing, sd, seed)
+                                         self.timing, self.sdata, sd, seed)
        self.lst_eat_valid = False

    def wave(self, line, vector):
@ -521,12 +525,12 @@ class WaveSim:
				@@ -521,12 +525,12 @@ class WaveSim:


@numba.njit
-def level_eval(ops, op_start, op_stop, state, sat, st_start, st_stop, line_times, sd, seed):
+def level_eval(ops, op_start, op_stop, state, sat, st_start, st_stop, line_times, sdata, sd, seed):
    overflows = 0
    for op_idx in range(op_start, op_stop):
        op = ops[op_idx]
        for st_idx in range(st_start, st_stop):
-            overflows += wave_eval(op, state, sat, st_idx, line_times, sd, seed)
+            overflows += wave_eval(op, state, sat, st_idx, line_times, sdata[st_idx], sd, seed)
    return overflows


@ -547,7 +551,7 @@ def rand_gauss(seed, sd):
				@@ -547,7 +551,7 @@ def rand_gauss(seed, sd):


@numba.njit
-def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0):
+def wave_eval(op, state, sat, st_idx, line_times, sdata, sd=0.0, seed=0):
    lut, z_idx, a_idx, b_idx = op
    overflows = int(0)

@ -563,9 +567,11 @@ def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0):
				@@ -563,9 +567,11 @@ def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0):
    if z_cur == 1:
        state[z_mem, st_idx] = TMIN

-    a = state[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss(_seed ^ a_mem ^ z_cur, sd)
-    b = state[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss(_seed ^ b_mem ^ z_cur, sd)
-
+    a = state[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss(_seed ^ a_mem ^ z_cur, sd) * sdata[0]
+    if int(sdata[1]) == a_idx: a += sdata[2+z_cur]
+    b = state[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss(_seed ^ b_mem ^ z_cur, sd) * sdata[0]
+    if int(sdata[1]) == b_idx: b += sdata[2+z_cur]
+    
    previous_t = TMIN

    current_t = min(a, b)
@ -576,15 +582,21 @@ def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0):
				@@ -576,15 +582,21 @@ def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0):
        if b < a:
            b_cur += 1
            b = state[b_mem + b_cur, st_idx]
-            b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ b_mem ^ z_val ^ 1, sd)
-            thresh = line_times[b_idx, 1, z_val] * rand_gauss(_seed ^ b_mem ^ z_val, sd)
+            b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ b_mem ^ z_val ^ 1, sd) * sdata[0]
+            thresh = line_times[b_idx, 1, z_val] * rand_gauss(_seed ^ b_mem ^ z_val, sd) * sdata[0]
+            if int(sdata[1]) == b_idx:
+                b += sdata[2+(z_val^1)]
+                thresh += sdata[2+z_val]
            inputs ^= 2
            next_t = b
        else:
            a_cur += 1
            a = state[a_mem + a_cur, st_idx]
-            a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ a_mem ^ z_val ^ 1, sd)
-            thresh = line_times[a_idx, 1, z_val] * rand_gauss(_seed ^ a_mem ^ z_val, sd)
+            a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss(_seed ^ a_mem ^ z_val ^ 1, sd) * sdata[0]
+            thresh = line_times[a_idx, 1, z_val] * rand_gauss(_seed ^ a_mem ^ z_val, sd) * sdata[0]
+            if int(sdata[1]) == a_idx:
+                a += sdata[2+(z_val^1)]
+                thresh += sdata[2+z_val]
            inputs ^= 1
            next_t = a

@ -618,6 +630,7 @@ def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0):
				@@ -618,6 +630,7 @@ def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0):
    return overflows


+
 class WaveSimCuda(WaveSim):
    """A GPU-accelerated waveform-based combinational logic timing simulator.

@ -636,6 +649,7 @@ class WaveSimCuda(WaveSim):
				@@ -636,6 +649,7 @@ class WaveSimCuda(WaveSim):
        self.d_timing = cuda.to_device(self.timing)
        self.d_tdata = cuda.to_device(self.tdata)
        self.d_cdata = cuda.to_device(self.cdata)
+        self.d_sdata = cuda.to_device(self.sdata)

        self._block_dim = (32, 16)

@ -650,6 +664,9 @@ class WaveSimCuda(WaveSim):
				@@ -650,6 +664,9 @@ class WaveSimCuda(WaveSim):

    def set_line_delay(self, line, polarity, delay):
        self.d_timing[line, 0, polarity] = delay
+                     
+    def sdata_to_device(self):
+        cuda.to_device(self.sdata, to=self.d_sdata)

    def assign(self, vectors, time=0.0, offset=0):
        assert (offset % 8) == 0
@ -676,7 +693,7 @@ class WaveSimCuda(WaveSim):
				@@ -676,7 +693,7 @@ class WaveSimCuda(WaveSim):
        for op_start, op_stop in zip(self.level_starts, self.level_stops):
            grid_dim = self._grid_dim(sims, op_stop - op_start)
            wave_kernel[grid_dim, self._block_dim](self.d_ops, op_start, op_stop, self.d_state, self.sat, int(0),
-                                                   sims, self.d_timing, sd, seed)
+                                                   sims, self.d_timing, self.d_sdata, sd, seed)
        cuda.synchronize()
        self.lst_eat_valid = False

@ -858,7 +875,7 @@ def rand_gauss_dev(seed, sd):
				@@ -858,7 +875,7 @@ def rand_gauss_dev(seed, sd):


@cuda.jit()
-def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_times, sd, seed):
+def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_times, sdata, sd, seed):
    x, y = cuda.grid(2)
    st_idx = st_start + x
    op_idx = op_start + y
@ -869,6 +886,7 @@ def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_time
				@@ -869,6 +886,7 @@ def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_time
    a_idx = ops[op_idx, 2]
    b_idx = ops[op_idx, 3]
    overflows = int(0)
+    sdata = sdata[st_idx]

    _seed = (seed << 4) + (z_idx << 20) + (st_idx << 1)

@ -882,9 +900,11 @@ def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_time
				@@ -882,9 +900,11 @@ def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_time
    if z_cur == 1:
        state[z_mem, st_idx] = TMIN

-    a = state[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss_dev(_seed ^ a_mem ^ z_cur, sd)
-    b = state[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss_dev(_seed ^ b_mem ^ z_cur, sd)
-
+    a = state[a_mem, st_idx] + line_times[a_idx, 0, z_cur] * rand_gauss_dev(_seed ^ a_mem ^ z_cur, sd) * sdata[0]
+    if int(sdata[1]) == a_idx: a += sdata[2+z_cur]
+    b = state[b_mem, st_idx] + line_times[b_idx, 0, z_cur] * rand_gauss_dev(_seed ^ b_mem ^ z_cur, sd) * sdata[0]
+    if int(sdata[1]) == b_idx: b += sdata[2+z_cur]
+    
    previous_t = TMIN

    current_t = min(a, b)
@ -895,15 +915,21 @@ def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_time
				@@ -895,15 +915,21 @@ def wave_kernel(ops, op_start, op_stop, state, sat, st_start, st_stop, line_time
        if b < a:
            b_cur += 1
            b = state[b_mem + b_cur, st_idx]
-            b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss_dev(_seed ^ b_mem ^ z_val ^ 1, sd)
-            thresh = line_times[b_idx, 1, z_val] * rand_gauss_dev(_seed ^ b_mem ^ z_val, sd)
+            b += line_times[b_idx, 0, z_val ^ 1] * rand_gauss_dev(_seed ^ b_mem ^ z_val ^ 1, sd) * sdata[0]
+            thresh = line_times[b_idx, 1, z_val] * rand_gauss_dev(_seed ^ b_mem ^ z_val, sd) * sdata[0]
+            if int(sdata[1]) == b_idx:
+                b += sdata[2+(z_val^1)]
+                thresh += sdata[2+z_val]
            inputs ^= 2
            next_t = b
        else:
            a_cur += 1
            a = state[a_mem + a_cur, st_idx]
-            a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss_dev(_seed ^ a_mem ^ z_val ^ 1, sd)
-            thresh = line_times[a_idx, 1, z_val] * rand_gauss_dev(_seed ^ a_mem ^ z_val, sd)
+            a += line_times[a_idx, 0, z_val ^ 1] * rand_gauss_dev(_seed ^ a_mem ^ z_val ^ 1, sd) * sdata[0]
+            thresh = line_times[a_idx, 1, z_val] * rand_gauss_dev(_seed ^ a_mem ^ z_val, sd) * sdata[0]
+            if int(sdata[1]) == a_idx:
+                a += sdata[2+(z_val^1)]
+                thresh += sdata[2+z_val]
            inputs ^= 1
            next_t = a

--- a/tests/test_circuit.py
+++ b/tests/test_circuit.py
@ -1,5 +1,7 @@
				@@ -1,5 +1,7 @@
-from kyupy.circuit import Circuit, Node, Line
+import pickle

+from kyupy.circuit import Circuit, Node, Line
+from kyupy import verilog

 def test_lines():
    c = Circuit()
@ -99,3 +101,12 @@ def test_circuit():
				@@ -99,3 +101,12 @@ def test_circuit():

    for n in c.topological_order():
        repr(n)
+
+
+def test_pickle(mydir):
+    c = verilog.load(mydir / 'b14.v.gz')
+    assert c is not None
+    cs = pickle.dumps(c)
+    assert cs is not None
+    c2 = pickle.loads(cs)
+    assert c == c2
--- a/tests/test_logic.py
+++ b/tests/test_logic.py
@ -145,6 +145,20 @@ def test_mv_operations():
				@@ -145,6 +145,20 @@ def test_mv_operations():
    assert lg.mv_xor(x1_4v, x2_4v)[0] == '0XX1XXXXXXXX1XX0'
    assert lg.mv_xor(x1_8v, x2_8v)[0] == '0XX1PRFNXXXXXXXXXXXXXXXX1XX0NFRPPXXNPRFNRXXFRPNFFXXRFNPRNXXPNFRP'

+    x30_2v = lg.MVArray("0000", m=2)
+    x31_2v = lg.MVArray("1111", m=2)
+    x30_4v = lg.MVArray("0000000000000000", m=4)
+    x31_4v = lg.MVArray("1111111111111111", m=4)
+    x30_8v = lg.MVArray("0000000000000000000000000000000000000000000000000000000000000000", m=8)
+    x31_8v = lg.MVArray("1111111111111111111111111111111111111111111111111111111111111111", m=8)
+
+    assert lg.mv_latch(x1_2v, x2_2v, x30_2v)[0] == '0001'
+    assert lg.mv_latch(x1_2v, x2_2v, x31_2v)[0] == '1011'
+    assert lg.mv_latch(x1_4v, x2_4v, x30_4v)[0] == '0XX00XXX0XXX0XX1'
+    assert lg.mv_latch(x1_4v, x2_4v, x31_4v)[0] == '1XX01XXX1XXX1XX1'
+    assert lg.mv_latch(x1_8v, x2_8v, x30_8v)[0] == '0XX000000XXXXXXX0XXXXXXX0XX10R110XX000000XXR0R0R0XXF001F0XX10R11'
+    assert lg.mv_latch(x1_8v, x2_8v, x31_8v)[0] == '1XX01F001XXXXXXX1XXXXXXX1XX111111XX01F001XXR110R1XXF1F1F1XX11111'
+

 def test_bparray():

@ -212,3 +226,27 @@ def test_bparray():
				@@ -212,3 +226,27 @@ def test_bparray():
    assert lg.MVArray(out_2v)[0] == '0110'
    assert lg.MVArray(out_4v)[0] == '0XX1XXXXXXXX1XX0'
    assert lg.MVArray(out_8v)[0] == '0XX1PRFNXXXXXXXXXXXXXXXX1XX0NFRPPXXNPRFNRXXFRPNFFXXRFNPRNXXPNFRP'
+
+    x30_2v = lg.BPArray("0000", m=2)
+    x30_4v = lg.BPArray("0000000000000000", m=4)
+    x30_8v = lg.BPArray("0000000000000000000000000000000000000000000000000000000000000000", m=8)
+
+    lg.bp_latch(out_2v.data, x1_2v.data, x2_2v.data, x30_2v.data)
+    lg.bp_latch(out_4v.data, x1_4v.data, x2_4v.data, x30_4v.data)
+    lg.bp_latch(out_8v.data, x1_8v.data, x2_8v.data, x30_8v.data)
+
+    assert lg.MVArray(out_2v)[0] == '0001'
+    assert lg.MVArray(out_4v)[0] == '0XX00XXX0XXX0XX1'
+    assert lg.MVArray(out_8v)[0] == '0XX000000XXXXXXX0XXXXXXX0XX10R110XX000000XXR0R0R0XXF001F0XX10R11'
+
+    x31_2v = lg.BPArray("1111", m=2)
+    x31_4v = lg.BPArray("1111111111111111", m=4)
+    x31_8v = lg.BPArray("1111111111111111111111111111111111111111111111111111111111111111", m=8)
+
+    lg.bp_latch(out_2v.data, x1_2v.data, x2_2v.data, x31_2v.data)
+    lg.bp_latch(out_4v.data, x1_4v.data, x2_4v.data, x31_4v.data)
+    lg.bp_latch(out_8v.data, x1_8v.data, x2_8v.data, x31_8v.data)
+
+    assert lg.MVArray(out_2v)[0] == '1011'
+    assert lg.MVArray(out_4v)[0] == '1XX01XXX1XXX1XX1'
+    assert lg.MVArray(out_8v)[0] == '1XX01F001XXXXXXX1XXXXXXX1XX111111XX01F001XXR110R1XXF1F1F1XX11111'
--- a/tests/test_logic_sim.py
+++ b/tests/test_logic_sim.py
@ -73,6 +73,45 @@ def test_8v():
				@@ -73,6 +73,45 @@ def test_8v():
        assert resp[i] == mva[i]


+def test_loop():
+    c = bench.parse('q=dff(d) d=not(q)')
+    s = LogicSim(c, 4, m=8)
+    assert len(s.interface) == 1
+    mva = MVArray([['0'], ['1'], ['R'], ['F']], m=8)
+
+    s.assign(BPArray(mva))
+    s.propagate()
+    resp_bp = BPArray((len(s.interface), s.sims))
+    s.capture(resp_bp)
+    resp = MVArray(resp_bp)
+
+    assert resp[0] == '1'
+    assert resp[1] == '0'
+    assert resp[2] == 'F'
+    assert resp[3] == 'R'
+
+    resp_bp = s.cycle(resp_bp)
+    resp = MVArray(resp_bp)
+
+    assert resp[0] == '0'
+    assert resp[1] == '1'
+    assert resp[2] == 'R'
+    assert resp[3] == 'F'
+
+
+def test_latch():
+    c = bench.parse('input(d, t) output(q) q=latch(d, t)')
+    s = LogicSim(c, 8, m=8)
+    assert len(s.interface) == 4
+    mva = MVArray(['00-0', '00-1', '01-0', '01-1', '10-0', '10-1', '11-0', '11-1'], m=8)
+    exp = MVArray(['0000', '0011', '0100', '0100', '1000', '1011', '1111', '1111'], m=8)
+
+    resp = MVArray(s.cycle(BPArray(mva)))
+
+    for i in range(len(mva)):
+        assert resp[i] == exp[i]
+
+
 def test_b01(mydir):
    c = bench.load(mydir / 'b01.bench')

--- a/tests/test_stil.py
+++ b/tests/test_stil.py
@ -1,8 +1,21 @@
				@@ -1,8 +1,21 @@
-from kyupy import stil
+from kyupy import stil, verilog


 def test_b14(mydir):
+    b14 = verilog.load(mydir / 'b14.v.gz')
+    
    s = stil.load(mydir / 'b14.stuck.stil.gz')
    assert len(s.signal_groups) == 10
    assert len(s.scan_chains) == 1
    assert len(s.calls) == 2163
+    tests = s.tests(b14)
+    resp = s.responses(b14)
+    assert len(tests) > 0
+    assert len(resp) > 0
+    
+    s2 = stil.load(mydir / 'b14.transition.stil.gz')
+    tests = s2.tests_loc(b14)
+    resp = s2.responses(b14)
+    assert len(tests) > 0
+    assert len(resp) > 0
+
--- a/tests/test_wave_sim.py
+++ b/tests/test_wave_sim.py
@ -29,20 +29,22 @@ def test_wave_eval():
				@@ -29,20 +29,22 @@ def test_wave_eval():
    sat[1] = 16, 16, 0
    sat[2] = 32, 16, 0

-    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times)
+    sdata = np.asarray([1, -1, 0, 0], dtype='float32')
+
+    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata)
    assert z[0] == TMIN

    a[0] = TMIN
-    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times)
+    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata)
    assert z[0] == TMIN

    b[0] = TMIN
-    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times)
+    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata)
    assert z[0] == TMAX

    a[0] = 1  # A _/^^^
    b[0] = 2  # B __/^^
-    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times)
+    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata)
    assert z[0] == TMIN  # ^^^\___ B -> Z fall delay
    assert z[1] == 2.4
    assert z[2] == TMAX
@ -50,7 +52,7 @@ def test_wave_eval():
				@@ -50,7 +52,7 @@ def test_wave_eval():
    a[0] = TMIN  # A ^^^^^^
    b[0] = TMIN  # B ^^^\__
    b[1] = 2
-    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times)
+    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata)
    assert z[0] == 2.3  # ___/^^^ B -> Z rise delay
    assert z[1] == TMAX

@ -59,7 +61,7 @@ def test_wave_eval():
				@@ -59,7 +61,7 @@ def test_wave_eval():
    b[0] = TMIN
    b[1] = 2     # B ^^\__/^^
    b[2] = 2.35
-    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times)
+    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata)
    assert z[0] == 2.3  # __/^^\__
    assert z[1] == 2.75
    assert z[2] == TMAX
@ -69,7 +71,7 @@ def test_wave_eval():
				@@ -69,7 +71,7 @@ def test_wave_eval():
    b[0] = 2  # B __/^^\__
    b[1] = 2.45
    b[2] = TMAX
-    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times)
+    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata)
    assert z[0] == TMIN  # ^^\__/^^
    assert z[1] == 2.4
    assert z[2] == 2.75
@ -80,7 +82,7 @@ def test_wave_eval():
				@@ -80,7 +82,7 @@ def test_wave_eval():
    b[0] = 2  # B __/^^\__
    b[1] = 2.35
    b[2] = TMAX
-    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times)
+    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata)
    assert z[0] == TMIN  # ^^^^^^
    assert z[1] == TMAX

@ -89,7 +91,7 @@ def test_wave_eval():
				@@ -89,7 +91,7 @@ def test_wave_eval():
    b[0] = TMIN
    b[1] = 2  # B ^^\__/^^
    b[2] = 2.25
-    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times)
+    wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times, sdata)
    assert z[0] == TMAX  # ______