more robust matching and assign processing

support injection into specific sims
fanout generator
21 changed files with 915 additions and 322 deletions
--- a/docs/conf.py
+++ b/docs/conf.py
@ -24,7 +24,7 @@ copyright = '2020-2023, Stefan Holst'
				@@ -24,7 +24,7 @@ copyright = '2020-2023, Stefan Holst'
 author = 'Stefan Holst'

 # The full version, including alpha/beta/rc tags
-release = '0.0.4'
+release = '0.0.5'


 # -- General configuration ---------------------------------------------------
--- a/setup.py
+++ b/setup.py
@ -5,7 +5,7 @@ with open('README.rst', 'r') as f:
				@@ -5,7 +5,7 @@ with open('README.rst', 'r') as f:

 setup(
    name='kyupy',
-    version='0.0.4',
+    version='0.0.5',
    description='High-performance processing and analysis of non-hierarchical VLSI designs',
    long_description=long_description,
    long_description_content_type='text/x-rst',
--- a/src/kyupy/init.py
+++ b/src/kyupy/init.py
@ -57,6 +57,18 @@ def hr_bytes(nbytes):
				@@ -57,6 +57,18 @@ def hr_bytes(nbytes):
        multiplier += 1
    return f'{nbytes:.1f}{["", "ki", "Mi", "Gi", "Ti", "Pi"][multiplier]}B'

+def eng(number):
+    """Formats a given number using engineering notation."""
+    exponent = 0
+    if abs(number) < 1:
+        while abs(number) >= 1000:
+            number *= 1000
+            exponent -= 3
+    else:
+        while abs(number) >= 1000:
+            number /= 1000
+            exponent += 3
+    return f'{number:.0f}' + (f'e{exponent}' if exponent != 0 else '')

 def hr_time(seconds):
    """Formats a given time interval for human readability."""
@ -138,10 +150,10 @@ class Log:
				@@ -138,10 +150,10 @@ class Log:
        self._limit = limit

    def stop_limit(self):
+        self._limit = -1
        if self.filtered > 0:
-            log.info(f'{self.filtered} more messages (filtered).')
+            self.info(f'{self.filtered} more messages (filtered).')
            self.filtered = 0
-        self._limit = -1

    def __getstate__(self):
        return {'elapsed': time.perf_counter() - self.start}
@ -149,6 +161,8 @@ class Log:
				@@ -149,6 +161,8 @@ class Log:
    def __setstate__(self, state):
        self.logfile = sys.stdout
        self.indent = 0
+        self._limit = -1
+        self.filtered = 0
        self.start = time.perf_counter() - state['elapsed']

    def write(self, s, indent=0):
--- a/src/kyupy/circuit.py
+++ b/src/kyupy/circuit.py
@ -10,20 +10,40 @@ Circuit graphs also define an ordering of inputs, outputs and other nodes to eas
				@@ -10,20 +10,40 @@ Circuit graphs also define an ordering of inputs, outputs and other nodes to eas

 """

+from __future__ import annotations
+
 from collections import deque, defaultdict
 import re
+from typing import Union

 import numpy as np


 class GrowingList(list):
    def __setitem__(self, index, value):
-        if index >= len(self):
-            self.extend([None] * (index + 1 - len(self)))
+        if value is None: self.has_nones = True
+        if index == len(self): return super().append(value)
+        if index > len(self):
+            super().extend([None] * (index + 1 - len(self)))
+            self.has_nones = True
        super().__setitem__(index, value)

-    def free_index(self):
-        return next((i for i, x in enumerate(self) if x is None), len(self))
+    def __getitem__(self, index):
+        if isinstance(index, slice): return super().__getitem__(index)
+        return super().__getitem__(index) if index < len(self) else None
+
+    @property
+    def free_idx(self):
+        fi = len(self)
+        if hasattr(self, 'has_nones') and self.has_nones:
+            fi = next((i for i, x in enumerate(self) if x is None), len(self))
+            self.has_nones = fi < len(self)
+        return fi
+
+    def without_nones(self):
+        for item in self:
+            if item is not None:
+                yield item


 class IndexList(list):
@ -76,10 +96,10 @@ class Node:
				@@ -76,10 +96,10 @@ class Node:
        by allocating an array or list :code:`my_data` of length :code:`len(n.circuit.nodes)` and
        accessing it by :code:`my_data[n.index]` or simply by :code:`my_data[n]`.
        """
-        self.ins = GrowingList()
+        self.ins: GrowingList[Line] = GrowingList()
        """A list of input connections (:class:`Line` objects).
        """
-        self.outs = GrowingList()
+        self.outs: GrowingList[Line] = GrowingList()
        """A list of output connections (:class:`Line` objects).
        """

@ -135,7 +155,7 @@ class Line:
				@@ -135,7 +155,7 @@ class Line:
    Use the explicit case only if connections to specific pins are required.
    It may overwrite any previous line references in the connection list of the nodes.
    """
-    def __init__(self, circuit, driver, reader):
+    def __init__(self, circuit: Circuit, driver: Union[Node, tuple[Node, int]], reader: Union[Node, tuple[Node, int]]):
        self.circuit = circuit
        """The :class:`Circuit` object the line is part of.
        """
@ -147,7 +167,7 @@ class Line:
				@@ -147,7 +167,7 @@ class Line:
        by allocating an array or list :code:`my_data` of length :code:`len(l.circuit.lines)` and
        accessing it by :code:`my_data[l.index]` or simply by :code:`my_data[l]`.
        """
-        if not isinstance(driver, tuple): driver = (driver, driver.outs.free_index())
+        if not isinstance(driver, tuple): driver = (driver, driver.outs.free_idx)
        self.driver = driver[0]
        """The :class:`Node` object that drives this line.
        """
@ -157,7 +177,7 @@ class Line:
				@@ -157,7 +177,7 @@ class Line:
        This is the position in the list :py:attr:`Node.outs` of the driving node this line referenced from:
        :code:`self.driver.outs[self.driver_pin] == self`.
        """
-        if not isinstance(reader, tuple): reader = (reader, reader.ins.free_index())
+        if not isinstance(reader, tuple): reader = (reader, reader.ins.free_idx)
        self.reader = reader[0]
        """The :class:`Node` object that reads this line.
        """
@ -292,7 +312,7 @@ class Circuit:
				@@ -292,7 +312,7 @@ class Circuit:
    def _locs(self, prefix, nodes):
        d_top = dict()
        for i, n in enumerate(nodes):
-            if m := re.match(fr'({prefix}.*?)((?:[\d_\[\]])*$)', n.name):
+            if m := re.match(fr'({re.escape(prefix)}.*?)((?:[\d_\[\]])*$)', n.name):
                path = [m[1]] + [int(v) for v in re.split(r'[_\[\]]+', m[2]) if len(v) > 0]
                d = d_top
                for j in path[:-1]:
@ -334,15 +354,16 @@ class Circuit:
				@@ -334,15 +354,16 @@ class Circuit:
    def get_or_add_fork(self, name):
        return self.forks[name] if name in self.forks else Node(self, name)

-    def remove_dangling_nodes(self, root_node:Node):
+    def remove_dangling_nodes(self, root_node:Node, keep=[]):
        if len([l for l in root_node.outs if l is not None]) > 0: return
        lines = [l for l in root_node.ins if l is not None]
        drivers = [l.driver for l in lines]
+        if root_node in keep: return
        root_node.remove()
        for l in lines:
            l.remove()
        for d in drivers:
-            self.remove_dangling_nodes(d)
+            self.remove_dangling_nodes(d, keep=keep)

    def eliminate_1to1_forks(self):
        """Removes all forks that drive only one node.
@ -370,6 +391,21 @@ class Circuit:
				@@ -370,6 +391,21 @@ class Circuit:
            in_line.reader_pin = out_reader_pin
            in_line.reader.ins[in_line.reader_pin] = in_line

+    def remove_forks(self):
+        ios = set(self.io_nodes)
+        for n in list(self.forks.values()):
+            if n in ios: continue
+            d = None
+            if (l := n.ins[0]) is not None:
+                d = l.driver
+                l.remove()
+            for l in list(n.outs):
+                if l is None: continue
+                r, rp = l.reader, l.reader_pin
+                l.remove()
+                if d is not None: Line(self, d, (r, rp))
+            n.remove()
+
    def substitute(self, node, impl):
        """Replaces a given node with the given implementation circuit.

@ -428,7 +464,7 @@ class Circuit:
				@@ -428,7 +464,7 @@ class Circuit:
        for l, ll in zip(impl_out_lines, node_out_lines):  # connect outputs
            if ll is None:
                if l.driver in node_map:
-                    self.remove_dangling_nodes(node_map[l.driver])
+                    self.remove_dangling_nodes(node_map[l.driver], keep=ios)
                continue
            if len(l.reader.outs) > 0:  # output is also read by impl. circuit, connect to fork.
                ll.driver = node_map[l.reader]
@ -447,6 +483,21 @@ class Circuit:
				@@ -447,6 +483,21 @@ class Circuit:
            if n.kind in tlib.cells:
                self.substitute(n, tlib.cells[n.kind][0])

+    def remove_constants(self):
+        c1gen = None
+        for n in self.nodes:
+            if n.kind == '__const0__':  # just remove, unconnected inputs are defined 0.
+                for l in n.outs:
+                    l.remove()
+                n.remove()
+            elif n.kind == '__const1__':
+                if c1gen is None: c1gen = Node(self, '__const1gen__', 'INV1')  # one unique const 1 generator
+                for l in n.outs:
+                    r, rp = l.reader, l.reader_pin
+                    l.remove()
+                    Line(self, c1gen, (r, rp))
+                n.remove()
+
    def copy(self):
        """Returns a deep copy of the circuit.
        """
@ -501,14 +552,15 @@ class Circuit:
				@@ -501,14 +552,15 @@ class Circuit:
        substrings 'dff' or 'latch' are yielded first.
        """
        visit_count = np.zeros(len(self.nodes), dtype=np.uint32)
-        queue = deque(n for n in self.nodes if len(n.ins) == 0 or 'dff' in n.kind.lower() or 'latch' in n.kind.lower())
+        start = set(n for n in self.nodes if len(n.ins) == 0 or 'dff' in n.kind.lower() or 'latch' in n.kind.lower())
+        queue = deque(start)
        while len(queue) > 0:
            n = queue.popleft()
            for line in n.outs:
                if line is None: continue
                succ = line.reader
                visit_count[succ] += 1
-                if visit_count[succ] == len(succ.ins) and 'dff' not in succ.kind.lower() and 'latch' not in succ.kind.lower():
+                if visit_count[succ] == len(succ.ins) and succ not in start:
                    queue.append(succ)
            yield n

@ -563,6 +615,21 @@ class Circuit:
				@@ -563,6 +615,21 @@ class Circuit:
            if marks[n]:
                yield n

+    def fanout(self, origin_nodes):
+        """Generator function to iterate over the fan-out cone of a given list of origin nodes.
+
+        Nodes are yielded in topological order.
+        """
+        marks = [False] * len(self.nodes)
+        for n in origin_nodes:
+            marks[n] = True
+        for n in self.topological_order():
+            if not marks[n]:
+                for line in n.ins.without_nones():
+                    marks[n] |= marks[line.driver]
+            if marks[n]:
+                yield n
+
    def fanout_free_regions(self):
        for stem in self.reversed_topological_order():
            if len(stem.outs) == 1 and 'dff' not in stem.kind.lower(): continue
--- a/src/kyupy/logic.py
+++ b/src/kyupy/logic.py
@ -241,6 +241,8 @@ def mv_latch(d, t, q_prev, out=None):
				@@ -241,6 +241,8 @@ def mv_latch(d, t, q_prev, out=None):
 def mv_transition(init, final, out=None):
    """Computes the logic transitions from the initial values of ``init`` to the final values of ``final``.
    Pulses in the input data are ignored. If any of the inputs are ``UNKNOWN``, the result is ``UNKNOWN``.
+    If init is ``UNASSIGNED``, the result is the final value of ``final``.
+    If final is ``UNASSIGNED``, the result is the initial value of ``init``.
    If both inputs are ``UNASSIGNED``, the result is ``UNASSIGNED``.

    :param init: A multi-valued array.
@ -251,7 +253,9 @@ def mv_transition(init, final, out=None):
				@@ -251,7 +253,9 @@ def mv_transition(init, final, out=None):
    out = out or np.empty(np.broadcast(init, final).shape, dtype=np.uint8)
    out[...] = (init & 0b010) | (final & 0b001)
    out[...] |= ((out << 1) ^ (out << 2)) & 0b100
-    unknown = (init == UNKNOWN) | (init == UNASSIGNED) | (final == UNKNOWN) | (final == UNASSIGNED)
+    out[...] = np.choose(init == UNASSIGNED, [out, (final & 0b001) * ONE])
+    out[...] = np.choose(final == UNASSIGNED, [out, ((init & 0b010) >> 1) * ONE])
+    unknown = (init == UNKNOWN) | (final == UNKNOWN)
    unassigned = (init == UNASSIGNED) & (final == UNASSIGNED)
    np.putmask(out, unknown, UNKNOWN)
    np.putmask(out, unassigned, UNASSIGNED)
@ -265,6 +269,18 @@ def mv_to_bp(mva):
				@@ -265,6 +269,18 @@ def mv_to_bp(mva):
    return np.packbits(unpackbits(mva)[...,:3], axis=-2, bitorder='little').swapaxes(-1,-2)


+def mv_init(mva):
+    """Returns the initial binary values for mva.
+    """
+    return (mva>>1) & ((mva>>2)|mva) & 1
+
+
+def mv_final(mva):
+    """Returns the final binary value of mva.
+    """
+    return mva & ((mva>>2)|(mva>>1)) & 1
+
+
 def bparray(*a):
    """Converts (lists of) Boolean values or strings into a bit-parallel array.

--- a/src/kyupy/logic_sim.py
+++ b/src/kyupy/logic_sim.py
@ -10,9 +10,10 @@ import math
				@@ -10,9 +10,10 @@ import math

 import numpy as np

-from . import numba, logic, hr_bytes, sim
+from . import numba, logic, hr_bytes, sim, eng, cdiv
 from .circuit import Circuit

+
 class LogicSim(sim.SimOps):
    """A bit-parallel naïve combinational simulator for 2-, 4-, or 8-valued logic.

@ -28,7 +29,7 @@ class LogicSim(sim.SimOps):
				@@ -28,7 +29,7 @@ class LogicSim(sim.SimOps):
        self.m = m
        self.mdim = math.ceil(math.log2(m))
        self.sims = sims
-        nbytes = (sims - 1) // 8 + 1
+        nbytes = cdiv(sims, 8)

        self.c = np.zeros((self.c_len, self.mdim, nbytes), dtype=np.uint8)
        self.s = np.zeros((2, self.s_len, 3, nbytes), dtype=np.uint8)
@ -44,14 +45,14 @@ class LogicSim(sim.SimOps):
				@@ -44,14 +45,14 @@ class LogicSim(sim.SimOps):
        self.s[:,:,1,:] = 255  # unassigned

    def __repr__(self):
-        return f'{{name: "{self.circuit.name}", sims: {self.sims}, m: {self.m}, c_bytes: {self.c.nbytes}}}'
+        return f'{{name: "{self.circuit.name}", sims: {self.sims}, m: {self.m}, c_bytes: {eng(self.c.nbytes)}}}'

    def s_to_c(self):
        """Copies the values from ``s[0]`` the inputs of the combinational portion.
        """
        self.c[self.pippi_c_locs] = self.s[0, self.pippi_s_locs, :self.mdim]

-    def c_prop(self, inject_cb=None):
+    def c_prop(self, sims=None, inject_cb=None, flip_line=-1, flip_mask=None):
        """Propagate the input values through the combinational circuit towards the outputs.

        Performs all logic operations in topological order.
@ -67,10 +68,17 @@ class LogicSim(sim.SimOps):
				@@ -67,10 +68,17 @@ class LogicSim(sim.SimOps):
        t1 = self.c_locs[self.tmp2_idx]
        if self.m == 2:
            if inject_cb is None:
-                _prop_cpu(self.ops, self.c_locs, self.c)
+                if flip_mask is None:
+                    flip_mask = np.full(self.c.shape[-1], 255, dtype=np.uint8)
+                else:
+                    if len(flip_mask) < self.c.shape[-1]:
+                        flip_mask2 = np.full(self.c.shape[-1], 0, dtype=np.uint8)
+                        flip_mask2[:len(flip_mask)] = flip_mask
+                        flip_mask = flip_mask2
+                _prop_cpu(self.ops, self.c_locs, self.c, int(flip_line), flip_mask)
            else:
-                for op, o0, i0, i1, i2, i3 in self.ops[:,:6]:
-                    o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)]
+                for op, o0l, i0l, i1l, i2l, i3l in self.ops[:,:6]:
+                    o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0l, i0l, i1l, i2l, i3l)]
                    if op == sim.BUF1: self.c[o0]=self.c[i0]
                    elif op == sim.INV1: self.c[o0] = ~self.c[i0]
                    elif op == sim.AND2: self.c[o0] = self.c[i0] & self.c[i1]
@ -105,10 +113,10 @@ class LogicSim(sim.SimOps):
				@@ -105,10 +113,10 @@ class LogicSim(sim.SimOps):
                    elif op == sim.OAI211:self.c[o0] = ~((self.c[i0] | self.c[i1]) & self.c[i2] & self.c[i3])
                    elif op == sim.MUX21: self.c[o0] = (self.c[i0] & ~self.c[i2]) | (self.c[i1] & self.c[i2])
                    else: print(f'unknown op {op}')
-                    inject_cb(o0, self.s[o0])
+                    inject_cb(o0l, self.c[o0])
        elif self.m == 4:
-            for op, o0, i0, i1, i2, i3 in self.ops[:,:6]:
-                o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)]
+            for op, o0l, i0l, i1l, i2l, i3l in self.ops[:,:6]:
+                o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0l, i0l, i1l, i2l, i3l)]
                if op == sim.BUF1: self.c[o0]=self.c[i0]
                elif op == sim.INV1: logic.bp4v_not(self.c[o0], self.c[i0])
                elif op == sim.AND2: logic.bp4v_and(self.c[o0], self.c[i0], self.c[i1])
@ -181,9 +189,10 @@ class LogicSim(sim.SimOps):
				@@ -181,9 +189,10 @@ class LogicSim(sim.SimOps):
                    logic.bp4v_and(self.c[t1], self.c[i1], self.c[i2])
                    logic.bp4v_or(self.c[o0], self.c[t0], self.c[t1])
                else: print(f'unknown op {op}')
+                if inject_cb is not None: inject_cb(o0l, self.c[o0])
        else:
-            for op, o0, i0, i1, i2, i3 in self.ops[:,:6]:
-                o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)]
+            for op, o0l, i0l, i1l, i2l, i3l in self.ops[:,:6]:
+                o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0l, i0l, i1l, i2l, i3l)]
                if op == sim.BUF1: self.c[o0]=self.c[i0]
                elif op == sim.INV1: logic.bp8v_not(self.c[o0], self.c[i0])
                elif op == sim.AND2: logic.bp8v_and(self.c[o0], self.c[i0], self.c[i1])
@ -256,7 +265,7 @@ class LogicSim(sim.SimOps):
				@@ -256,7 +265,7 @@ class LogicSim(sim.SimOps):
                    logic.bp8v_and(self.c[t1], self.c[i1], self.c[i2])
                    logic.bp8v_or(self.c[o0], self.c[t0], self.c[t1])
                else: print(f'unknown op {op}')
-                if inject_cb is not None: inject_cb(o0, self.s[o0])
+                if inject_cb is not None: inject_cb(o0l, self.c[o0])

    def c_to_s(self):
        """Copies (captures) the results of the combinational portion to ``s[1]``.
@ -296,9 +305,9 @@ class LogicSim(sim.SimOps):
				@@ -296,9 +305,9 @@ class LogicSim(sim.SimOps):


@numba.njit
-def _prop_cpu(ops, c_locs, c):
-    for op, o0, i0, i1, i2, i3 in ops[:,:6]:
-        o0, i0, i1, i2, i3 = [c_locs[x] for x in (o0, i0, i1, i2, i3)]
+def _prop_cpu(ops, c_locs, c, flip_line, flip_mask):
+    for op, o0l, i0l, i1l, i2l, i3l in ops[:,:6]:
+        o0, i0, i1, i2, i3 = [c_locs[x] for x in (o0l, i0l, i1l, i2l, i3l)]
        if op == sim.BUF1: c[o0]=c[i0]
        elif op == sim.INV1: c[o0] = ~c[i0]
        elif op == sim.AND2: c[o0] = c[i0] & c[i1]
@ -333,3 +342,124 @@ def _prop_cpu(ops, c_locs, c):
				@@ -333,3 +342,124 @@ def _prop_cpu(ops, c_locs, c):
        elif op == sim.OAI211: c[o0] = ~((c[i0] | c[i1]) & c[i2] & c[i3])
        elif op == sim.MUX21: c[o0] = (c[i0] & ~c[i2]) | (c[i1] & c[i2])
        else: print(f'unknown op {op}')
+        if flip_line >= 0 and o0l == flip_line:
+            #n = len(flip_mask)
+            c[o0] = c[o0] ^ flip_mask
+
+
+class LogicSim6V(sim.SimOps):
+    """A bit-parallel naïve combinational simulator for 6-valued logic.
+
+    :param circuit: The circuit to simulate.
+    :param sims: The number of parallel logic simulations to perform.
+    :param c_reuse: If True, intermediate signal values may get overwritten when not needed anymore to save memory.
+    :param strip_forks: If True, forks are not included in the simulation model to save memory and simulation time.
+    """
+    def __init__(self, circuit: Circuit, sims: int = 8, c_reuse: bool = False, strip_forks: bool = False):
+        super().__init__(circuit, c_reuse=c_reuse, strip_forks=strip_forks)
+        self.sims = sims
+        nbytes = cdiv(sims, 8)
+
+        self.c = np.zeros((self.c_len, 3, nbytes), dtype=np.uint8)
+        self.s = np.zeros((2, self.s_len, self.sims), dtype=np.uint8)
+        """Logic values of the sequential elements (flip-flops) and ports.
+
+        It is a pair of arrays in mv storage format:
+
+        * ``s[0]`` Assigned values. Simulator will read (P)PI value from here.
+        * ``s[1]`` Result values. Simulator will write (P)PO values here.
+
+        Access this array to assign new values to the (P)PIs or read values from the (P)POs.
+        """
+
+    def __repr__(self):
+        return f'{{name: "{self.circuit.name}", sims: {self.sims}, c_bytes: {eng(self.c.nbytes)}}}'
+
+    def s_to_c(self):
+        """Assigns the values from ``s[0]`` to the inputs of the combinational portion.
+        """
+        self.c[self.pippi_c_locs] = logic.mv_to_bp(self.s[0, self.pippi_s_locs])
+
+    def c_prop(self):
+        c_prop_cpu(self.ops, self.c, self.c_locs, self.tmp_idx, self.tmp2_idx)
+
+    def c_to_s(self):
+        """Captures the results of the combinational portion into ``s[1]``.
+        """
+        self.s[1, self.poppo_s_locs] = logic.bp_to_mv(self.c[self.poppo_c_locs])[:,:self.sims]
+
+
+@numba.njit
+def c_prop_cpu(ops, c, c_locs, tmp_idx, tmp2_idx):
+    t0 = c[c_locs[tmp_idx]]
+    t1 = c[c_locs[tmp2_idx]]
+    inv_op = np.array([255, 255, 0], dtype=np.uint8)[np.newaxis, :, np.newaxis]
+    for op, o0l, i0l, i1l, i2l, i3l in ops[:,:6]:
+        o0, i0, i1, i2, i3 = [c[c_locs[x]] for x in (o0l, i0l, i1l, i2l, i3l)]
+        if op == sim.BUF1 or op == sim.INV1:
+            o0[...] = i0
+        elif op == sim.AND2 or op == sim.NAND2:
+            o0[0] = i0[0] & i1[0]
+            o0[1] = i0[1] & i1[1]
+            o0[2] = (i0[2]&(i1[0]|i1[1]|i1[2])|
+                     i1[2]&(i0[0]|i0[1]|i0[2]))
+        elif op == sim.AND3 or op == sim.NAND3:
+            o0[0] = i0[0] & i1[0] & i2[0]
+            o0[1] = i0[1] & i1[1] & i2[1]
+            o0[2] = (i0[2]&(i1[0]|i1[1]|i1[2])&(i2[0]|i2[1]|i2[2])|
+                     i1[2]&(i0[0]|i0[1]|i0[2])&(i2[0]|i2[1]|i2[2])|
+                     i2[2]&(i0[0]|i0[1]|i0[2])&(i1[0]|i1[1]|i1[2]))
+        elif op == sim.AND4 or op == sim.NAND4:
+            o0[0] = i0[0] & i1[0] & i2[0] & i3[0]
+            o0[1] = i0[1] & i1[1] & i2[1] & i3[1]
+            o0[2] = (i0[2]&(i1[0]|i1[1]|i1[2])&(i2[0]|i2[1]|i2[2])&(i3[0]|i3[1]|i3[2])|
+                     i1[2]&(i0[0]|i0[1]|i0[2])&(i2[0]|i2[1]|i2[2])&(i3[0]|i3[1]|i3[2])|
+                     i2[2]&(i0[0]|i0[1]|i0[2])&(i1[0]|i1[1]|i1[2])&(i3[0]|i3[1]|i3[2])|
+                     i3[2]&(i0[0]|i0[1]|i0[2])&(i1[0]|i1[1]|i1[2])&(i2[0]|i2[1]|i2[2]))
+        elif op == sim.OR2 or op == sim.NOR2:
+            o0[0] = i0[0] | i1[0]
+            o0[1] = i0[1] | i1[1]
+            o0[2] = (i0[2]&(~i1[0]|~i1[1]|i1[2])|
+                     i1[2]&(~i0[0]|~i0[1]|i0[2]))
+        elif op == sim.OR3 or op == sim.NOR3:
+            o0[0] = i0[0] | i1[0] | i2[0]
+            o0[1] = i0[1] | i1[1] | i2[1]
+            o0[2] = (i0[2]&(~i1[0]|~i1[1]|i1[2])&(~i2[0]|~i2[1]|i2[2])|
+                     i1[2]&(~i0[0]|~i0[1]|i0[2])&(~i2[0]|~i2[1]|i2[2])|
+                     i2[2]&(~i0[0]|~i0[1]|i0[2])&(~i1[0]|~i1[1]|i1[2]))
+        elif op == sim.OR4 or op == sim.NOR4:
+            o0[0] = i0[0] | i1[0] | i2[0] | i3[0]
+            o0[1] = i0[1] | i1[1] | i2[1] | i3[1]
+            o0[2] = (i0[2]&(~i1[0]|~i1[1]|i1[2])&(~i2[0]|~i2[1]|i2[2])&(~i3[0]|~i3[1]|i3[2])|
+                     i1[2]&(~i0[0]|~i0[1]|i0[2])&(~i2[0]|~i2[1]|i2[2])&(~i3[0]|~i3[1]|i3[2])|
+                     i2[2]&(~i0[0]|~i0[1]|i0[2])&(~i1[0]|~i1[1]|i1[2])&(~i3[0]|~i3[1]|i3[2])|
+                     i3[2]&(~i0[0]|~i0[1]|i0[2])&(~i1[0]|~i1[1]|i1[2])&(~i2[0]|~i2[1]|i2[2]))
+        elif op == sim.XOR2 or op == sim.XNOR2:
+            o0[0] = i0[0] ^ i1[0]
+            o0[1] = i0[1] ^ i1[1]
+            o0[2] = i0[2] | i1[2]
+        elif op == sim.MUX21:
+            # t1 = ~i2
+            t1[...] = i2 ^ inv_op
+            # t0 = i0 & t1
+            t0[0] = i0[0] & t1[0]
+            t0[1] = i0[1] & t1[1]
+            t0[2] = (i0[2]&(t1[0]|t1[1]|t1[2])|
+                     t1[2]&(i0[0]|i0[1]|i0[2]))
+            # t1 = i1 & i2
+            t1[0] = i1[0] & i2[0]
+            t1[1] = i1[1] & i2[1]
+            t1[2] = (i1[2]&(i2[0]|i2[1]|i2[2])|
+                     i2[2]&(i1[0]|i1[1]|i1[2]))
+            # o0 = t0 | t1
+            o0[0] = t0[0] | t1[0]
+            o0[1] = t0[1] | t1[1]
+            o0[2] = (t0[2]&(~t1[0]|~t1[1]|t1[2])|
+                     t1[2]&(~t0[0]|~t0[1]|t0[2]))
+        else: print(f'unknown op {op}')
+
+        if (op == sim.INV1 or
+            op == sim.NAND2 or op == sim.NAND3 or op == sim.NAND4 or
+            op == sim.NOR2 or op == sim.NOR3 or op == sim.NOR4 or
+            op == sim.XNOR2):
+            o0[...] = o0 ^ inv_op
--- a/src/kyupy/sdf.py
+++ b/src/kyupy/sdf.py
@ -61,20 +61,21 @@ class DelayFile:
				@@ -61,20 +61,21 @@ class DelayFile:

        delays = np.zeros((len(circuit.lines), 2, 2, 3))  # dataset last during construction.

-        for name, iopaths in self.cells.items():
-            name = name.replace('\\', '')
-            if cell := circuit.cells.get(name, None):
-                for i_pin_spec, o_pin_spec, *dels in iopaths:
-                    if i_pin_spec.startswith('(posedge '): i_pol_idxs = [0]
-                    elif i_pin_spec.startswith('(negedge '): i_pol_idxs = [1]
-                    else: i_pol_idxs = [0, 1]
-                    i_pin_spec = re.sub(r'\((neg|pos)edge ([^)]+)\)', r'\2', i_pin_spec)
-                    if line := cell.ins[tlib.pin_index(cell.kind, i_pin_spec)]:
-                        delays[line, i_pol_idxs] = [d if len(d) > 0 else [0, 0, 0] for d in dels]
-                    else:
-                        log.warn(f'No line to annotate in circuit: {i_pin_spec} for {cell}')
-            else:
-                log.warn(f'Name from SDF not found in circuit: {name}')
+        with log.limit(50):
+            for name, iopaths in self.cells.items():
+                name = name.replace('\\', '')
+                if cell := circuit.cells.get(name, None):
+                    for i_pin_spec, o_pin_spec, *dels in iopaths:
+                        if i_pin_spec.startswith('(posedge '): i_pol_idxs = [0]
+                        elif i_pin_spec.startswith('(negedge '): i_pol_idxs = [1]
+                        else: i_pol_idxs = [0, 1]
+                        i_pin_spec = re.sub(r'\((neg|pos)edge ([^)]+)\)', r'\2', i_pin_spec)
+                        if line := cell.ins[tlib.pin_index(cell.kind, i_pin_spec)]:
+                            delays[line, i_pol_idxs] = [d if len(d) > 0 else [0, 0, 0] for d in dels]
+                        else:
+                            log.warn(f'No line to annotate in circuit: {i_pin_spec} for {cell}')
+                else:
+                    log.warn(f'Name from SDF not found in circuit: {name}')

        return np.moveaxis(delays, -1, 0)

@ -102,11 +103,12 @@ class DelayFile:
				@@ -102,11 +103,12 @@ class DelayFile:

        delays = np.zeros((len(circuit.lines), 2, 2, 3))  # dataset last during construction.

+        nonfork_annotations = 0
        for n1, n2, *delvals in self._interconnects:
            delvals = [d if len(d) > 0 else [0, 0, 0] for d in delvals]
            if max(max(delvals)) == 0: continue
-            cn1, pn1 = n1.split('/') if '/' in n1 else (n1, None)
-            cn2, pn2 = n2.split('/') if '/' in n2 else (n2, None)
+            cn1, pn1 = (n1, None) if (slash := n1.rfind('/')) < 0 else (n1[:slash], n1[slash+1:])
+            cn2, pn2 = (n2, None) if (slash := n2.rfind('/')) < 0 else (n2[:slash], n2[slash+1:])
            cn1 = cn1.replace('\\','')
            cn2 = cn2.replace('\\','')
            c1, c2 = circuit.cells[cn1], circuit.cells[cn2]
@ -119,19 +121,27 @@ class DelayFile:
				@@ -119,19 +121,27 @@ class DelayFile:
                log.warn(f'No line to annotate pin {pn2} of {c2}')
                continue
            f1, f2 = c1.outs[p1].reader, c2.ins[p2].driver  # find the forks between cells.
-            assert f1.kind == '__fork__'
-            assert f2.kind == '__fork__'
-            if f1 != f2:  # at least two forks, make sure f2 is a branchfork connected to f1
-                assert len(f2.outs) == 1
-                assert f1.outs[f2.ins[0].driver_pin] == f2.ins[0]
-                line = f2.ins[0]
-            elif len(f2.outs) == 1:  # f1==f2, only OK when there is no fanout.
-                line = f2.ins[0]
+            if f1 == c2 and f2 == c1:
+                nonfork_annotations += 1
+                if nonfork_annotations < 10:
+                    log.warn(f'No fork between {c1.name}/{p1} and {c2.name}/{p2}, using {c2.name}/{p2}')
+                line = c2.ins[p2]
            else:
-                log.warn(f'No branchfork to annotate interconnect delay {c1.name}/{p1}->{c2.name}/{p2}')
-                continue
+                assert f1.kind == '__fork__'
+                assert f2.kind == '__fork__'
+                if len(f2.outs) == 1:
+                    assert f1 == f2 or f1.outs[f2.ins[0].driver_pin] == f2.ins[0]
+                    line = f2.ins[0]
+                else:
+                    nonfork_annotations += 1
+                    if nonfork_annotations < 10:
+                        log.warn(f'No branchfork between {c1.name}/{p1} and {c2.name}/{p2}, using {c2.name}/{p2}')
+                    line = c2.ins[p2]
            delays[line, :] = delvals

+        if nonfork_annotations > 0:
+            log.warn(f'{nonfork_annotations} interconnect annotations were moved to gate inputs due to missing forks.')
+
        return np.moveaxis(delays, -1, 0)


@ -156,6 +166,10 @@ class SdfTransformer(Transformer):
				@@ -156,6 +166,10 @@ class SdfTransformer(Transformer):
        entries = [e for a in args if hasattr(a, 'children') for e in a.children]
        return name, entries

+    @staticmethod
+    def cond(args):  # ignore conditions
+        return args[1]
+
    @staticmethod
    def start(args):
        name = next((a for a in args if isinstance(a, str)), None)
@ -180,9 +194,12 @@ GRAMMAR = r"""
				@@ -180,9 +194,12 @@ GRAMMAR = r"""
        | "(INSTANCE" ID? ")"
        | "(TIMINGCHECK" _ignore* ")"
        | delay )* ")"
-    delay: "(DELAY" "(ABSOLUTE" (interconnect | iopath)* ")" ")"
+    delay: "(DELAY" "(ABSOLUTE" (interconnect | iopath | cond)* ")" ")"
    interconnect: "(INTERCONNECT" ID ID triple* ")"
    iopath: "(IOPATH" ID_OR_EDGE ID_OR_EDGE triple* ")"
+    cond: "(" "COND" cond_port_expr iopath ")"
+    ?cond_port_expr: ID | "(" cond_port_expr ")" | cond_port_expr BINARY_OP cond_port_expr
+    BINARY_OP: /&&/ | /==/
    NAME: /[^"]+/
    ID_OR_EDGE: ( /[^() ]+/ | "(" /[^)]+/ ")" )
    ID: ( /[^"() ]+/ | "\"" /[^"]+/ "\"" )
--- a/src/kyupy/sim.py
+++ b/src/kyupy/sim.py
@ -4,9 +4,14 @@ from bisect import bisect, insort_left
				@@ -4,9 +4,14 @@ from bisect import bisect, insort_left

 import numpy as np

+from .circuit import Circuit
+
 BUF1 = np.uint16(0b1010_1010_1010_1010)
 INV1 = ~BUF1

+__const0__ = BUF1
+__const1__ = INV1
+
 AND2 = np.uint16(0b1000_1000_1000_1000)
 AND3 = np.uint16(0b1000_0000_1000_0000)
 AND4 = np.uint16(0b1000_0000_0000_0000)
@ -39,7 +44,10 @@ AOI211, OAI211 = ~AO211, ~OA211
				@@ -39,7 +44,10 @@ AOI211, OAI211 = ~AO211, ~OA211

 MUX21 = np.uint16(0b1100_1010_1100_1010)  # z = i1 if i2 else i0 (i2 is select)

-names = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16)])
+names = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16) and '__' not in k])
+
+prim2name = dict([(v, k) for k, v in globals().items() if isinstance(v, np.uint16) and '__' not in k])
+name2prim = dict([(k, v) for k, v in globals().items() if isinstance(v, np.uint16)])

 kind_prefixes = {
    'nand': (NAND4, NAND3, NAND2),
@ -156,7 +164,7 @@ class SimOps:
				@@ -156,7 +164,7 @@ class SimOps:
    :param c_reuse: If enabled, memory of intermediate signal waveforms will be re-used. This greatly reduces
        memory footprint, but intermediate signal waveforms become unaccessible after a propagation.
    """
-    def __init__(self, circuit, c_caps=1, c_caps_min=1, a_ctrl=None, c_reuse=False, strip_forks=False):
+    def __init__(self, circuit: Circuit, c_caps=1, c_caps_min=1, a_ctrl=None, c_reuse=False, strip_forks=False):
        self.circuit = circuit
        self.s_len = len(circuit.s_nodes)

@ -175,84 +183,74 @@ class SimOps:
				@@ -175,84 +183,74 @@ class SimOps:
        self.ppo_offset = self.ppi_offset + self.s_len
        self.c_locs_len = self.ppo_offset + self.s_len

-        # translate circuit structure into self.ops
-        ops = []
-        interface_dict = dict((n, i) for i, n in enumerate(circuit.s_nodes))
-        for n in circuit.topological_order():
-            if n in interface_dict:
-                inp_idx = self.ppi_offset + interface_dict[n]
-                if len(n.outs) > 0 and n.outs[0] is not None:  # first output of a PI/PPI
-                    ops.append((BUF1, n.outs[0].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[n.outs[0]]))
-                if 'dff' in n.kind.lower():  # second output of DFF is inverted
-                    if len(n.outs) > 1 and n.outs[1] is not None:
-                        ops.append((INV1, n.outs[1].index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[n.outs[1]]))
-                else:  # if not DFF, no output is inverted.
-                    for o_line in n.outs[1:]:
-                        if o_line is not None:
-                            ops.append((BUF1, o_line.index, inp_idx, self.zero_idx, self.zero_idx, self.zero_idx, *a_ctrl[o_line]))
-                continue
-            # regular node, not PI/PPI or PO/PPO
-            o0_idx = n.outs[0].index if len(n.outs) > 0 and n.outs[0] is not None else self.tmp_idx
-            i0_idx = n.ins[0].index if len(n.ins) > 0 and n.ins[0] is not None else self.zero_idx
-            i1_idx = n.ins[1].index if len(n.ins) > 1 and n.ins[1] is not None else self.zero_idx
-            i2_idx = n.ins[2].index if len(n.ins) > 2 and n.ins[2] is not None else self.zero_idx
-            i3_idx = n.ins[3].index if len(n.ins) > 3 and n.ins[3] is not None else self.zero_idx
-            kind = n.kind.lower()
-            if kind == '__fork__':
-                if not strip_forks:
-                    for o_line in n.outs:
-                        if o_line is not None:
-                            ops.append((BUF1, o_line.index, i0_idx, i1_idx, i2_idx, i3_idx, *a_ctrl[o_line]))
-                continue
-            sp = None
-            for prefix, prims in kind_prefixes.items():
-                if kind.startswith(prefix):
-                    sp = prims[0]
-                    if i3_idx == self.zero_idx:
-                        sp = prims[1]
-                        if i2_idx == self.zero_idx:
-                            sp = prims[2]
-                    break
-            if sp is None:
-                print('unknown cell type', kind)
-            else:
-                ops.append((sp, o0_idx, i0_idx, i1_idx, i2_idx, i3_idx, *a_ctrl[o0_idx]))
-
-        self.ops = np.asarray(ops, dtype='int32')
+        # ALAP-toposort the circuit into self.ops
+        levels = []
+
+        ppio2idx = dict((n, i) for i, n in enumerate(circuit.s_nodes))
+        ppos = set([n for n in circuit.s_nodes if len(n.ins) > 0])
+        readers = np.array([1 if l.reader in ppos else len(l.reader.outs) for l in circuit.lines], dtype=np.int32)  # for ref-counting forks
+
+        level_lines = [n.ins[0] for n in ppos]  # start from PPOs
+        # FIXME: Should probably instanciate buffers for PPOs and attach DFF clocks
+
+        while len(level_lines) > 0:  # traverse the circuit level-wise back towards (P)PIs
+            level_ops = []
+            prev_level_lines = []
+
+            for l in level_lines:
+                n = l.driver
+                in_idxs = [n.ins[x].index if len(n.ins) > x and n.ins[x] is not None else self.zero_idx for x in [0,1,2,3]]
+                if n in ppio2idx:
+                    in_idxs[0] = self.ppi_offset + ppio2idx[n]
+                    if l.driver_pin == 1 and 'dff' in n.kind.lower():  # second output of DFF is inverted
+                        level_ops.append((INV1, l.index, *in_idxs, *a_ctrl[l]))
+                    else:
+                        level_ops.append((BUF1, l.index, *in_idxs, *a_ctrl[l]))
+                elif n.kind == '__fork__':
+                    readers[n.ins[0]] -= 1
+                    if readers[n.ins[0]] == 0: prev_level_lines.append(n.ins[0])
+                    if not strip_forks: level_ops.append((BUF1, l.index, *in_idxs, *a_ctrl[l]))
+                else:
+                    prev_level_lines += n.ins
+                    sp = None
+                    kind = n.kind.lower()
+                    for prefix, prims in kind_prefixes.items():
+                        if kind.startswith(prefix):
+                            sp = prims[0]
+                            if in_idxs[3] == self.zero_idx:
+                                sp = prims[1]
+                                if in_idxs[2] == self.zero_idx:
+                                    sp = prims[2]
+                            break
+                    if sp is None:
+                        print('unknown cell type', kind)
+                    else:
+                        level_ops.append((sp, l.index, *in_idxs, *a_ctrl[l]))
+
+            if len(level_ops) > 0: levels.append(level_ops)
+            level_lines = prev_level_lines
+
+        self.levels = [np.asarray(lv, dtype=np.int32) for lv in levels[::-1]]
+        level_sums = np.cumsum([0]+[len(lv) for lv in self.levels], dtype=np.int32)
+        self.level_starts, self.level_stops = level_sums[:-1], level_sums[1:]
+        self.ops = np.vstack(self.levels)

        # create a map from fanout lines to stem lines for fork stripping
-        stems = np.zeros(self.c_locs_len, dtype='int32') - 1  # default to -1: 'no fanout line'
+        stems = np.full(self.c_locs_len, -1, dtype=np.int32)  # default to -1: 'no fanout line'
        if strip_forks:
            for f in circuit.forks.values():
                prev_line = f.ins[0]
                while prev_line.driver.kind == '__fork__':
                    prev_line = prev_line.driver.ins[0]
-                stem_idx = prev_line.index
                for ol in f.outs:
                    if ol is not None:
-                        stems[ol] = stem_idx
-
-        # calculate level (distance from PI/PPI) and reference count for each line
-        levels = np.zeros(self.c_locs_len, dtype='int32')
-        ref_count = np.zeros(self.c_locs_len, dtype='int32')
-        level_starts = [0]
-        current_level = 1
-        for i, op in enumerate(self.ops):
-            # if we fork-strip, always take the stems for determining fan-in level
-            i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2]
-            i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3]
-            i2_idx = stems[op[4]] if stems[op[4]] >= 0 else op[4]
-            i3_idx = stems[op[5]] if stems[op[5]] >= 0 else op[5]
-            if levels[i0_idx] >= current_level or levels[i1_idx] >= current_level or levels[i2_idx] >= current_level or levels[i3_idx] >= current_level:
-                current_level += 1
-                level_starts.append(i)
-            levels[op[1]] = current_level  # set level of the output line
-            ref_count[i0_idx] += 1
-            ref_count[i1_idx] += 1
-            ref_count[i2_idx] += 1
-            ref_count[i3_idx] += 1
-        self.level_starts = np.asarray(level_starts, dtype='int32')
-        self.level_stops = np.asarray(level_starts[1:] + [len(self.ops)], dtype='int32')
+                        stems[ol] = prev_line.index
+
+        ref_count = np.zeros(self.c_locs_len, dtype=np.int32)
+
+        for op in self.ops:
+            for x in [2, 3, 4, 5]:
+                ref_count[stems[op[x]] if stems[op[x]] >= 0 else op[x]] += 1

        # combinational signal allocation table. maps line and interface indices to self.c memory locations
        self.c_locs = np.full((self.c_locs_len,), -1, dtype=np.int32)
@ -278,9 +276,9 @@ class SimOps:
				@@ -278,9 +276,9 @@ class SimOps:
                ref_count[i0_idx] += 1

        # allocate memory for the rest of the circuit
-        for op_start, op_stop in zip(self.level_starts, self.level_stops):
+        for ops in self.levels:
            free_set = set()
-            for op in self.ops[op_start:op_stop]:
+            for op in ops:
                # if we fork-strip, always take the stems
                i0_idx = stems[op[2]] if stems[op[2]] >= 0 else op[2]
                i1_idx = stems[op[3]] if stems[op[3]] >= 0 else op[3]
@ -299,7 +297,8 @@ class SimOps:
				@@ -299,7 +297,8 @@ class SimOps:
                self.c_locs[o_idx], self.c_caps[o_idx] = h.alloc(cap), cap
            if c_reuse:
                for loc in free_set:
-                    h.free(loc)
+                    if loc >= 0:  # DFF clocks are not allocated. Ignore for now.
+                        h.free(loc)

        # copy memory location and capacity from stems to fanout lines
        for lidx, stem in enumerate(stems):
@ -311,6 +310,15 @@ class SimOps:
				@@ -311,6 +310,15 @@ class SimOps:
            if len(n.ins) > 0:
                self.c_locs[self.ppo_offset + i], self.c_caps[self.ppo_offset + i] = self.c_locs[n.ins[0]], self.c_caps[n.ins[0]]

+        # line use information
+        self.line_use_start = np.full(self.c_locs_len, -1, dtype=np.int32)
+        self.line_use_stop = np.full(self.c_locs_len, len(self.levels), dtype=np.int32)
+        for i, lv in enumerate(self.levels):
+            for op in lv:
+                self.line_use_start[op[1]] = i
+                for x in [2, 3, 4, 5]:
+                    self.line_use_stop[op[x]] = i
+
        self.c_len = h.max_size

        d = defaultdict(int)
--- a/src/kyupy/stil.py
+++ b/src/kyupy/stil.py
@ -41,7 +41,7 @@ class StilFile:
				@@ -41,7 +41,7 @@ class StilFile:
                unload = {}
                for so_port in self.so_ports:
                    if so_port in call.parameters:
-                        unload[so_port] = call.parameters[so_port].replace('\n', '').replace('N', '-')
+                        unload[so_port] = call.parameters[so_port]
                if len(capture) > 0:
                    self.patterns.append(ScanPattern(sload, launch, capture, unload))
                    capture = {}
@ -49,11 +49,9 @@ class StilFile:
				@@ -49,11 +49,9 @@ class StilFile:
                sload = {}
                for si_port in self.si_ports:
                    if si_port in call.parameters:
-                        sload[si_port] = call.parameters[si_port].replace('\n', '').replace('N', '-')
-            if call.name.endswith('_launch'):
-                launch = dict((k, v.replace('\n', '').replace('N', '-')) for k, v in call.parameters.items())
-            if call.name.endswith('_capture'):
-                capture = dict((k, v.replace('\n', '').replace('N', '-')) for k, v in call.parameters.items())
+                        sload[si_port] = call.parameters[si_port]
+            if call.name.endswith('_launch'): launch = call.parameters
+            if call.name.endswith('_capture'): capture = call.parameters

    def _maps(self, c):
        interface = list(c.io_nodes) + [n for n in c.nodes if 'DFF' in n.kind]
@ -100,12 +98,12 @@ class StilFile:
				@@ -100,12 +98,12 @@ class StilFile:
        tests = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED)
        for i, p in enumerate(self.patterns):
            for si_port in self.si_ports.keys():
-                pattern = logic.mvarray(p.load[si_port])
+                pattern = logic.mvarray(p.load[si_port][0])
                inversions = np.choose((pattern == logic.UNASSIGNED) | (pattern == logic.UNKNOWN),
                                       [scan_inversions[si_port], logic.ZERO]).astype(np.uint8)
                np.bitwise_xor(pattern, inversions, out=pattern)
                tests[scan_maps[si_port], i] = pattern
-            tests[pi_map, i] = logic.mvarray(p.capture['_pi'])
+            tests[pi_map, i] = logic.mvarray(p.capture['_pi'][0])
        return tests

    def tests_loc(self, circuit, init_filter=None, launch_filter=None):
@ -134,12 +132,12 @@ class StilFile:
				@@ -134,12 +132,12 @@ class StilFile:
        for i, p in enumerate(self.patterns):
            # init.set_values(i, '0' * len(interface))
            for si_port in self.si_ports.keys():
-                pattern = logic.mvarray(p.load[si_port])
+                pattern = logic.mvarray(p.load[si_port][0])
                inversions = np.choose((pattern == logic.UNASSIGNED) | (pattern == logic.UNKNOWN),
                                       [scan_inversions[si_port], logic.ZERO]).astype(np.uint8)
                np.bitwise_xor(pattern, inversions, out=pattern)
                init[scan_maps[si_port], i] = pattern
-            init[pi_map, i] = logic.mvarray(p.launch['_pi'] if '_pi' in p.launch else p.capture['_pi'])
+            init[pi_map, i] = logic.mvarray(p.launch['_pi'][0] if '_pi' in p.launch else p.capture['_pi'][0])
        if init_filter: init = init_filter(init)
        sim8v = LogicSim(circuit, init.shape[-1], m=8)
        sim8v.s[0] = logic.mv_to_bp(init)
@ -149,12 +147,12 @@ class StilFile:
				@@ -149,12 +147,12 @@ class StilFile:
        launch = logic.bp_to_mv(sim8v.s[1])[..., :init.shape[-1]]
        for i, p in enumerate(self.patterns):
            # if there was no launch cycle or launch clock, then init = launch
-            if '_pi' not in p.launch or 'P' not in p.launch['_pi'] or 'P' not in p.capture['_pi']:
+            if '_pi' not in p.launch or 'P' not in p.launch['_pi'][0] or 'P' not in p.capture['_pi'][0]:
                for si_port in self.si_ports.keys():
-                    pattern = logic.mv_xor(logic.mvarray(p.load[si_port]), scan_inversions[si_port])
+                    pattern = logic.mv_xor(logic.mvarray(p.load[si_port][0]), scan_inversions[si_port])
                    launch[scan_maps[si_port], i] = pattern
-            if '_pi' in p.capture and 'P' in p.capture['_pi']:
-                launch[pi_map, i] = logic.mvarray(p.capture['_pi'])
+            if '_pi' in p.capture and 'P' in p.capture['_pi'][0]:
+                launch[pi_map, i] = logic.mvarray(p.capture['_pi'][0])
            launch[po_map, i] = logic.UNASSIGNED
        if launch_filter: launch = launch_filter(launch)

@ -171,9 +169,9 @@ class StilFile:
				@@ -171,9 +169,9 @@ class StilFile:
        interface, _, po_map, scan_maps, scan_inversions = self._maps(circuit)
        resp = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED)
        for i, p in enumerate(self.patterns):
-            resp[po_map, i] = logic.mvarray(p.capture['_po'] if len(p.capture) > 0 else p.launch['_po'])
+            resp[po_map, i] = logic.mvarray(p.capture['_po'][0] if len(p.capture) > 0 else p.launch['_po'][0])
            for so_port in self.so_ports.keys():
-                pattern = logic.mv_xor(logic.mvarray(p.unload[so_port]), scan_inversions[so_port])
+                pattern = logic.mv_xor(logic.mvarray(p.unload[so_port][0]), scan_inversions[so_port])
                resp[scan_maps[so_port], i] = pattern
        return resp

@ -192,7 +190,7 @@ class StilTransformer(Transformer):
				@@ -192,7 +190,7 @@ class StilTransformer(Transformer):
    def call(args): return Call(args[0], dict(args[1:]))

    @staticmethod
-    def call_parameter(args): return args[0], args[1].value
+    def call_parameter(args): return args[0], (args[1].value.replace('\n', '').replace('N', '-'), args[1].start_pos)

    @staticmethod
    def signal_group(args): return args[0], args[1:]
--- a/src/kyupy/techlib.py
+++ b/src/kyupy/techlib.py
@ -11,50 +11,6 @@ from itertools import product
				@@ -11,50 +11,6 @@ from itertools import product
 from . import bench


-class TechLibOld:
-    @staticmethod
-    def pin_index(kind, pin):
-        if isinstance(pin, int):
-            return max(0, pin-1)
-        if kind[:3] in ('OAI', 'AOI'):
-            if pin[0] == 'A': return int(pin[1]) - 1
-            if pin == 'B': return int(kind[3])
-            if pin[0] == 'B': return int(pin[1]) - 1 + int(kind[3])
-        for prefix, pins, index in [('HADD', ('B0', 'SO'), 1),
-                                    ('HADD', ('A0', 'C1'), 0),
-                                    ('MUX21', ('S', 'S0'), 2),
-                                    ('MX2', ('S0',), 2),
-                                    ('TBUF', ('OE',), 1),
-                                    ('TINV', ('OE',), 1),
-                                    ('LATCH', ('D',), 0),
-                                    ('LATCH', ('QN',), 1),
-                                    ('DFF', ('D',), 0),
-                                    ('DFF', ('QN',), 1),
-                                    ('SDFF', ('D',), 0),
-                                    ('SDFF', ('QN',), 1),
-                                    ('SDFF', ('CLK',), 3),
-                                    ('SDFF', ('RSTB', 'RN'), 4),
-                                    ('SDFF', ('SETB',), 5),
-                                    ('ISOL', ('ISO',), 0),
-                                    ('ISOL', ('D',), 1)]:
-            if kind.startswith(prefix) and pin in pins: return index
-        for index, pins in enumerate([('A1', 'IN1', 'A', 'S', 'INP', 'I', 'Q', 'QN', 'Y', 'Z', 'ZN'),
-                                      ('A2', 'IN2', 'B', 'CK', 'CLK', 'CO', 'SE'),
-                                      ('A3', 'IN3', 'C', 'RN', 'RSTB', 'CI', 'SI'),
-                                      ('A4', 'IN4', 'D', 'SN', 'SETB'),
-                                      ('A5', 'IN5', 'E'),
-                                      ('A6', 'IN6', 'F')]):
-            if pin in pins: return index
-        raise ValueError(f'Unknown pin index for {kind}.{pin}')
-
-    @staticmethod
-    def pin_is_output(kind, pin):
-        if isinstance(pin, int):
-            return pin == 0
-        if 'MUX' in kind and pin == 'S': return False
-        return pin in ('Q', 'QN', 'Z', 'ZN', 'Y', 'CO', 'S', 'SO', 'C1')
-
-
 class TechLib:
    """Class for standard cell library definitions.

@ -93,6 +49,14 @@ class TechLib:
				@@ -93,6 +49,14 @@ class TechLib:
        assert pin in self.cells[kind][1], f'Unknown pin: {pin} for cell {kind}'
        return self.cells[kind][1][pin][0]

+    def pin_name(self, kind, pos, output=False):
+        """Returns the pin name for a given node kind, list position, and direction."""
+        assert kind in self.cells, f'Unknown cell: {kind}'
+        for name, (ppos, isout) in self.cells[kind][1].items():
+            if isout == output and ppos == pos:
+                return name
+        return None
+
    def pin_is_output(self, kind, pin):
        """Returns True, if given pin name of a node kind is an output."""
        assert kind in self.cells, f'Unknown cell: {kind}'
@ -138,21 +102,92 @@ TLATX1   input(C,D)       output(Q,QN) Q=LATCH(D,C) QN=INV1(Q) ;
				@@ -138,21 +102,92 @@ TLATX1   input(C,D)       output(Q,QN) Q=LATCH(D,C) QN=INV1(Q) ;
 """


-_nangate_common = r"""
+NANGATE = TechLib(r"""
+FILLTIE ;
+FILL_X{1,2,4,8,16} ;
+ANTENNA input(I)   ;
+
+TIEH output(Z) Z=__const1__() ;
+TIEL output(ZN) ZN=__const0__() ;
+
+BUF_X{1,2,4,8,12,16}  input(I) output(Z)  Z=BUF1(I)  ;
+INV_X{1,2,4,8,12,16}  input(I) output(ZN) ZN=INV1(I) ;
+
+CLKBUF_X{1,2,4,8,12,16} input(I) output(Z)  Z=BUF1(I)  ;
+CLKGATETST_X1 input(CLK,E,TE) output(Q) Q=OA21(CLK,E,TE) ;
+
+AND2_X{1,2}  input(A1,A2)       output(Z)  Z=AND2(A1,A2)         ;
+AND3_X{1,2}  input(A1,A2,A3)    output(Z)  Z=AND3(A1,A2,A3)      ;
+AND4_X{1,2}  input(A1,A2,A3,A4) output(Z)  Z=AND4(A1,A2,A3,A4)   ;
+NAND2_X{1,2} input(A1,A2)       output(ZN) ZN=NAND2(A1,A2)       ;
+NAND3_X{1,2} input(A1,A2,A3)    output(ZN) ZN=NAND3(A1,A2,A3)    ;
+NAND4_X{1,2} input(A1,A2,A3,A4) output(ZN) ZN=NAND4(A1,A2,A3,A4) ;
+OR2_X{1,2}   input(A1,A2)       output(Z)  Z=OR2(A1,A2)          ;
+OR3_X{1,2}   input(A1,A2,A3)    output(Z)  Z=OR3(A1,A2,A3)       ;
+OR4_X{1,2}   input(A1,A2,A3,A4) output(Z)  Z=OR4(A1,A2,A3,A4)    ;
+NOR2_X{1,2}  input(A1,A2)       output(ZN) ZN=NOR2(A1,A2)        ;
+NOR3_X{1,2}  input(A1,A2,A3)    output(ZN) ZN=NOR3(A1,A2,A3)     ;
+NOR4_X{1,2}  input(A1,A2,A3,A4) output(ZN) ZN=NOR4(A1,A2,A3,A4)  ;
+XOR2_X1      input(A1,A2)       output(Z)  Z=XOR2(A1,A2)         ;
+XNOR2_X1     input(A1,A2)       output(ZN) ZN=XNOR2(A1,A2)       ;
+
+MUX2_X1 input(I0,I1,S) output(Z) Z=MUX21(I0,I1,S) ;
+
+HA_X1 input(A,B) output(CO,S) CO=XOR2(A,B) S=AND2(A,B) ;
+FA_X1 input(A,B,CI) output(CO,S) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(CI,A,B) ;
+
+AOI21_X{1,2} input(A1,A2,B)     output(ZN) ZN=AOI21(A1,A2,B)     ;
+OAI21_X{1,2} input(A1,A2,B)     output(ZN) ZN=OAI21(A1,A2,B)     ;
+AOI22_X{1,2} input(A1,A2,B1,B2) output(ZN) ZN=AOI22(A1,A2,B1,B2) ;
+OAI22_X{1,2} input(A1,A2,B1,B2) output(ZN) ZN=OAI22(A1,A2,B1,B2) ;
+
+DFFRNQ_X1  input(D,RN,CLK)    output(Q)  DR=AND2(D,RN) Q=DFF(DR,CLK) ;
+DFFSNQ_X1  input(D,SN,CLK)    output(Q)  S=INV1(SN) DS=OR2(D,S) Q=DFF(DS,CLK) ;
+
+SDFFRNQ_X1 input(D,RN,SE,SI,CLK) output(Q) DR=AND2(D,RN) DI=MUX21(DR,SI,SE) Q=DFF(DI,CLK) ;
+SDFFSNQ_X1 input(D,SE,SI,SN,CLK) output(Q) S=INV1(SN) DS=OR2(D,S) DI=MUX21(DS,SI,SE) Q=DFF(DI,CLK) ;
+
+TBUF_X{1,2,4,8,12,16} input(EN,I)   output(Z)  Z=BUF1(I) ;
+
+LHQ_X1 input(D,E) output(Q)  Q=LATCH(D,E) ;
+""")
+"""Nangate 15nm Open Cell Library (NanGate_15nm_OCL_v0.1_2014_06.A).
+"""
+
+
+NANGATE45 = TechLib(r"""
 FILLCELL_X{1,2,4,8,16,32} ;
+ANTENNA_X1 input(A) ;

 LOGIC0_X1 output(Z) Z=__const0__() ;
 LOGIC1_X1 output(Z) Z=__const1__() ;

 BUF_X{1,2,4,8,16,32}  input(A) output(Z)  Z=BUF1(A)  ;
-CLKBUF_X{1,2,3}       input(A) output(Z)  Z=BUF1(A)  ;
+INV_X{1,2,4,8,16,32}  input(A) output(ZN) ZN=INV1(A) ;
+
+CLKBUF_X{1,2,3}       input(A)       output(Z)   Z=BUF1(A)         ;
+CLKGATETST_X{1,2,4,8} input(CK,E,SE) output(GCK) GCK=OA21(CK,E,SE) ;
+CLKGATE_X{1,2,4,8}    input(CK,E)    output(GCK) GCK=AND2(CK,E)    ;

+AND2_X{1,2,4}  input(A1,A2)       output(ZN) ZN=AND2(A1,A2)        ;
+AND3_X{1,2,4}  input(A1,A2,A3)    output(ZN) ZN=AND3(A1,A2,A3)     ;
+AND4_X{1,2,4}  input(A1,A2,A3,A4) output(ZN) ZN=AND4(A1,A2,A3,A4)  ;
 NAND2_X{1,2,4} input(A1,A2)       output(ZN) ZN=NAND2(A1,A2)       ;
 NAND3_X{1,2,4} input(A1,A2,A3)    output(ZN) ZN=NAND3(A1,A2,A3)    ;
 NAND4_X{1,2,4} input(A1,A2,A3,A4) output(ZN) ZN=NAND4(A1,A2,A3,A4) ;
+OR2_X{1,2,4}   input(A1,A2)       output(ZN) ZN=OR2(A1,A2)         ;
+OR3_X{1,2,4}   input(A1,A2,A3)    output(ZN) ZN=OR3(A1,A2,A3)      ;
+OR4_X{1,2,4}   input(A1,A2,A3,A4) output(ZN) ZN=OR4(A1,A2,A3,A4)   ;
 NOR2_X{1,2,4}  input(A1,A2)       output(ZN) ZN=NOR2(A1,A2)        ;
 NOR3_X{1,2,4}  input(A1,A2,A3)    output(ZN) ZN=NOR3(A1,A2,A3)     ;
 NOR4_X{1,2,4}  input(A1,A2,A3,A4) output(ZN) ZN=NOR4(A1,A2,A3,A4)  ;
+XOR2_X{1,2}    input(A,B)         output(Z)  Z=XOR2(A,B)           ;
+XNOR2_X{1,2}   input(A,B)         output(ZN) ZN=XNOR2(A,B)         ;
+
+MUX2_X{1,2} input(A,B,S) output(Z) Z=MUX21(A,B,S) ;
+
+HA_X1 input(A,B) output(CO,S) CO=XOR2(A,B) S=AND2(A,B) ;
+FA_X1 input(A,B,CI) output(CO,S) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(CI,A,B) ;

 AOI21_X{1,2,4} input(A,B1,B2)     output(ZN) ZN=AOI21(B1,B2,A)     ;
 OAI21_X{1,2,4} input(A,B1,B2)     output(ZN) ZN=OAI21(B1,B2,A)     ;
@ -162,8 +197,6 @@ OAI22_X{1,2,4} input(A1,A2,B1,B2) output(ZN) ZN=OAI22(A1,A2,B1,B2) ;
				@@ -162,8 +197,6 @@ OAI22_X{1,2,4} input(A1,A2,B1,B2) output(ZN) ZN=OAI22(A1,A2,B1,B2) ;
 OAI211_X{1,2,4} input(A,B,C1,C2) output(ZN) ZN=OAI211(C1,C2,A,B)   ;
 AOI211_X{1,2,4} input(A,B,C1,C2) output(ZN) ZN=AOI211(C1,C2,A,B)   ;

-MUX2_X{1,2} input(A,B,S) output(Z) Z=MUX21(A,B,S) ;
-
 AOI221_X{1,2,4} input(A,B1,B2,C1,C2) output(ZN) BC=AO22(B1,B2,C1,C2) ZN=NOR2(BC,A)  ;
 OAI221_X{1,2,4} input(A,B1,B2,C1,C2) output(ZN) BC=OA22(B1,B2,C1,C2) ZN=NAND2(BC,A) ;

@ -172,14 +205,6 @@ OAI222_X{1,2,4} input(A1,A2,B1,B2,C1,C2) output(ZN) BC=OA22(B1,B2,C1,C2) ZN=OAI2
				@@ -172,14 +205,6 @@ OAI222_X{1,2,4} input(A1,A2,B1,B2,C1,C2) output(ZN) BC=OA22(B1,B2,C1,C2) ZN=OAI2

 OAI33_X1 input(A1,A2,A3,B1,B2,B3) output(ZN) AA=OR2(A1,A2) BB=OR2(B1,B2) ZN=OAI22(AA,A3,BB,B3) ;

-HA_X1 input(A,B) output(CO,S) CO=XOR2(A,B) S=AND2(A,B) ;
-
-FA_X1 input(A,B,CI) output(CO,S) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(CI,A,B) ;
-
-CLKGATE_X{1,2,4,8} input(CK,E) output(GCK) GCK=AND2(CK,E) ;
-
-CLKGATETST_X{1,2,4,8} input(CK,E,SE) output(GCK) GCK=OA21(CK,E,SE) ;
-
 DFF_X{1,2}   input(D,CK)       output(Q,QN)  Q=DFF(D,CK) QN=INV1(Q) ;
 DFFR_X{1,2}  input(D,RN,CK)    output(Q,QN)  DR=AND2(D,RN) Q=DFF(DR,CK) QN=INV1(Q) ;
 DFFS_X{1,2}  input(D,SN,CK)    output(Q,QN)  S=INV1(SN) DS=OR2(D,S) Q=DFF(DS,CK) QN=INV1(Q) ;
@ -191,43 +216,16 @@ SDFFS_X{1,2}  input(D,SE,SI,SN,CK)    output(Q,QN)  S=INV1(SN) DS=OR2(D,S) DI=MU
				@@ -191,43 +216,16 @@ SDFFS_X{1,2}  input(D,SE,SI,SN,CK)    output(Q,QN)  S=INV1(SN) DS=OR2(D,S) DI=MU
 SDFFRS_X{1,2} input(D,RN,SE,SI,SN,CK) output(Q,QN)  S=INV1(SN) DS=OR2(D,S) DRS=AND2(DS,RN) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CK) QN=INV1(Q) ;

 TBUF_X{1,2,4,8,16} input(A,EN)   output(Z)  Z=BUF1(A)    ;
+
 TINV_X1            input(I,EN)   output(ZN) ZN=INV1(I)   ;
 TLAT_X1            input(D,G,OE) output(Q)  Q=LATCH(D,G) ;

 DLH_X{1,2} input(D,G) output(Q)  Q=LATCH(D,G)            ;
 DLL_X{1,2} input(D,GN) output(Q) G=INV1(GN) Q=LATCH(D,G) ;
-"""

-
-NANGATE = TechLib(_nangate_common + r"""
-INV_X{1,2,4,8,16,32}  input(I) output(ZN) ZN=INV1(I) ;
-
-AND2_X{1,2,4}  input(A1,A2)       output(Z)  Z=AND2(A1,A2)        ;
-AND3_X{1,2,4}  input(A1,A2,A3)    output(Z)  Z=AND3(A1,A2,A3)     ;
-AND4_X{1,2,4}  input(A1,A2,A3,A4) output(Z)  Z=AND4(A1,A2,A3,A4)  ;
-OR2_X{1,2,4}   input(A1,A2)       output(Z)  Z=OR2(A1,A2)         ;
-OR3_X{1,2,4}   input(A1,A2,A3)    output(Z)  Z=OR3(A1,A2,A3)      ;
-OR4_X{1,2,4}   input(A1,A2,A3,A4) output(Z)  Z=OR4(A1,A2,A3,A4)   ;
-XOR2_X{1,2}    input(A1,A2)       output(Z)  Z=XOR2(A1,A2)        ;
-XNOR2_X{1,2}   input(A1,A2)       output(ZN) ZN=XNOR2(A1,A2)      ;
-""")
-"""An newer NANGATE-variant that uses 'Z' as output pin names for AND and OR gates.
-"""
-
-
-NANGATE_ZN = TechLib(_nangate_common + r"""
-INV_X{1,2,4,8,16,32}  input(A) output(ZN) ZN=INV1(A) ;
-
-AND2_X{1,2,4}  input(A1,A2)       output(ZN) ZN=AND2(A1,A2)        ;
-AND3_X{1,2,4}  input(A1,A2,A3)    output(ZN) ZN=AND3(A1,A2,A3)     ;
-AND4_X{1,2,4}  input(A1,A2,A3,A4) output(ZN) ZN=AND4(A1,A2,A3,A4)  ;
-OR2_X{1,2,4}   input(A1,A2)       output(ZN) ZN=OR2(A1,A2)         ;
-OR3_X{1,2,4}   input(A1,A2,A3)    output(ZN) ZN=OR3(A1,A2,A3)      ;
-OR4_X{1,2,4}   input(A1,A2,A3,A4) output(ZN) ZN=OR4(A1,A2,A3,A4)   ;
-XOR2_X{1,2}    input(A,B)         output(Z)  Z=XOR2(A,B)           ;
-XNOR2_X{1,2}   input(A,B)         output(ZN) ZN=XNOR2(A,B)         ;
 """)
-"""An older NANGATE-variant that uses 'ZN' as output pin names for AND and OR gates.
+"""Nangate 45nm Open Cell Library (NangateOpenCellLibrary_PDKv1_3_v2010_12).
+This NANGATE-variant that uses 'ZN' as output pin names for AND and OR gates.
 """


--- a/src/kyupy/verilog.py
+++ b/src/kyupy/verilog.py
@ -123,6 +123,9 @@ class VerilogTransformer(Transformer):
				@@ -123,6 +123,9 @@ class VerilogTransformer(Transformer):
        assignments = []
        for stmt in args[2:]:  # pass 1: instantiate cells and driven signals
            if isinstance(stmt, Instantiation):
+                if stmt.type not in self.tlib.cells:
+                    log.warn(f'Ignoring cell of unknown kind "{stmt.type}"')
+                    continue
                n = Node(c, stmt.name, kind=stmt.type)
                for p, s in stmt.pins.items():
                    if self.tlib.pin_is_output(n.kind, p):
@ -141,35 +144,41 @@ class VerilogTransformer(Transformer):
				@@ -141,35 +144,41 @@ class VerilogTransformer(Transformer):
                        c.io_nodes[positions[name]] = n
                    if sd.kind == 'input':
                        Line(c, n, Node(c, name))
-        for target, source in assignments:  # pass 1.5: process signal assignments
-            target_sigs = []
-            if not isinstance(target, list): target = [target]
-            for s in target:
-                if s in sig_decls:
-                    target_sigs += sig_decls[s].names
-                else:
-                    target_sigs.append(s)
-            source_sigs = []
-            if not isinstance(source, list): source = [source]
-            for s in source:
-                if s in sig_decls:
-                    source_sigs += sig_decls[s].names
-                else:
-                    source_sigs.append(s)
-            for t, s in zip(target_sigs, source_sigs):
-                if t in c.forks:
-                    assert s not in c.forks, 'assignment between two driven signals'
-                    Line(c, c.forks[t], Node(c, s))
-                elif s in c.forks:
-                    assert t not in c.forks, 'assignment between two driven signals'
-                    Line(c, c.forks[s], Node(c, t))
-                elif s.startswith("1'b"):
-                    cnode = Node(c, f'__const{s[3]}_{const_count}__', f'__const{s[3]}__')
-                    const_count += 1
-                    Line(c, cnode, Node(c, t))
+        while len(assignments) > 0:
+            more_assignments = []
+            for target, source in assignments:  # pass 1.5: process signal assignments
+                target_sigs = []
+                if not isinstance(target, list): target = [target]
+                for s in target:
+                    if s in sig_decls:
+                        target_sigs += sig_decls[s].names
+                    else:
+                        target_sigs.append(s)
+                source_sigs = []
+                if not isinstance(source, list): source = [source]
+                for s in source:
+                    if s in sig_decls:
+                        source_sigs += sig_decls[s].names
+                    else:
+                        source_sigs.append(s)
+                for t, s in zip(target_sigs, source_sigs):
+                    if t in c.forks:
+                        assert s not in c.forks, 'assignment between two driven signals'
+                        Line(c, c.forks[t], Node(c, s))
+                    elif s in c.forks:
+                        assert t not in c.forks, 'assignment between two driven signals'
+                        Line(c, c.forks[s], Node(c, t))
+                    elif s.startswith("1'b"):
+                        cnode = Node(c, f'__const{s[3]}_{const_count}__', f'__const{s[3]}__')
+                        const_count += 1
+                        Line(c, cnode, Node(c, t))
+                    else:
+                        more_assignments.append((target, source))
+            assignments = more_assignments
        for stmt in args[2:]:  # pass 2: connect signals to readers
            if isinstance(stmt, Instantiation):
                for p, s in stmt.pins.items():
+                    if stmt.name not in c.cells: continue
                    n = c.cells[stmt.name]
                    if self.tlib.pin_is_output(n.kind, p): continue
                    if s.startswith("1'b"):
--- a/src/kyupy/wave_sim.py
+++ b/src/kyupy/wave_sim.py
@ -13,10 +13,11 @@ Two simulators are available: :py:class:`WaveSim` runs on the CPU, and the deriv
				@@ -13,10 +13,11 @@ Two simulators are available: :py:class:`WaveSim` runs on the CPU, and the deriv
 """

 import math
+from collections import defaultdict

 import numpy as np

-from . import numba, cuda, sim, cdiv
+from . import log, numba, cuda, sim, cdiv, eng


 TMAX = np.float32(2 ** 127)
@ -59,8 +60,8 @@ class WaveSim(sim.SimOps):
				@@ -59,8 +60,8 @@ class WaveSim(sim.SimOps):
        self.delays = np.zeros((len(delays), self.c_locs_len, 2, 2), dtype=delays.dtype)
        self.delays[:, :delays.shape[1]] = delays

-        self.c = np.zeros((self.c_len, sims), dtype=np.float32) + TMAX
-        self.s = np.zeros((11, self.s_len, sims), dtype=np.float32)
+        self.c = np.full((self.c_len, self.sims), TMAX, dtype=np.float32)
+        self.s = np.zeros((11, self.s_len, self.sims), dtype=np.float32)
        """Information about the logic values and transitions around the sequential elements (flip-flops) and ports.

        The first 3 values are read by :py:func:`s_to_c`.
@ -98,12 +99,18 @@ class WaveSim(sim.SimOps):
				@@ -98,12 +99,18 @@ class WaveSim(sim.SimOps):
        self.simctl_int[0] = range(sims)  # unique seed for each sim by default, zero this to pick same delays for all sims.
        self.simctl_int[1] = 2  # random picking by default.

-        self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.c_locs, self.c_caps, self.ops, self.simctl_int)])
+        self.e = np.zeros((self.c_locs_len, sims, 2), dtype=np.uint8)  # aux data for each line and sim
+
+        self.error_counts = np.zeros(self.s_len, dtype=np.uint32)  # number of capture errors by PPO
+        self.lsts = np.zeros(self.s_len, dtype=np.float32)  # LST by PPO
+        self.overflows = np.zeros(self.s_len, dtype=np.uint32)  # Overflows by PPO
+
+        self.nbytes = sum([a.nbytes for a in (self.c, self.s, self.e, self.c_locs, self.c_caps, self.ops, self.simctl_int)])

    def __repr__(self):
        dev = 'GPU' if hasattr(self.c, 'copy_to_host') else 'CPU'
        return f'{{name: "{self.circuit.name}", device: "{dev}", sims: {self.sims}, ops: {len(self.ops)}, ' + \
-               f'levels: {len(self.level_starts)}, nbytes: {self.nbytes}}}'
+               f'levels: {len(self.level_starts)}, nbytes: {eng(self.nbytes)}}}'

    def s_to_c(self):
        """Transfers values of sequential elements and primary inputs to the combinational portion.
@ -116,7 +123,7 @@ class WaveSim(sim.SimOps):
				@@ -116,7 +123,7 @@ class WaveSim(sim.SimOps):
        self.c[self.pippi_c_locs+1] = np.choose(cond, [TMAX, TMAX, sins[1], TMAX])
        self.c[self.pippi_c_locs+2] = TMAX

-    def c_prop(self, sims=None, seed=1):
+    def c_prop(self, sims=None, seed=1, delta=0):
        """Propagates all waveforms from the (pseudo) primary inputs to the (pseudo) primary outputs.

        :param sims: Number of parallel simulations to execute. If None, all available simulations are performed.
@ -124,7 +131,7 @@ class WaveSim(sim.SimOps):
				@@ -124,7 +131,7 @@ class WaveSim(sim.SimOps):
        """
        sims = min(sims or self.sims, self.sims)
        for op_start, op_stop in zip(self.level_starts, self.level_stops):
-            level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.abuf, 0, sims, self.delays, self.simctl_int, seed)
+            level_eval_cpu(self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.e, self.abuf, 0, sims, self.delays, self.simctl_int, seed, delta)

    def c_to_s(self, time=TMAX, sd=0.0, seed=1):
        """Simulates a capture operation at all sequential elements and primary outputs.
@ -152,7 +159,7 @@ class WaveSim(sim.SimOps):
				@@ -152,7 +159,7 @@ class WaveSim(sim.SimOps):
        self.s[2, self.ppio_s_locs] = self.s[8, self.ppio_s_locs]


-def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0):
+def _wave_eval(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int, seed, delta):
    overflows = int(0)

    lut = op[0]
@ -162,6 +169,18 @@ def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0):
				@@ -162,6 +169,18 @@ def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0):
    c_idx = op[4]
    d_idx = op[5]

+    input_epoch = (ebuf[a_idx, sim, 1]|
+                   ebuf[b_idx, sim, 1]|
+                   ebuf[c_idx, sim, 1]|
+                   ebuf[d_idx, sim, 1])
+
+    output_epoch = ebuf[z_idx, sim, 1]
+
+    if (delta):
+        if input_epoch == 0 and output_epoch == 0: return 0, 0
+
+    out_changed = output_epoch
+
    if len(delays) > 1:
        if simctl_int[1] == 0:
            delays = delays[seed]
@ -206,25 +225,25 @@ def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0):
				@@ -206,25 +225,25 @@ def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0):
        if a == current_t:
            a_cur += 1
            inputs ^= 1
-            thresh = delays[a_idx, a_cur & 1, z_val]
+            thresh = delays[a_idx, a_cur & 1 ^ 1, z_val]
            a = cbuf[a_mem + a_cur, sim] + delays[a_idx, a_cur & 1, z_val]
            next_t = cbuf[a_mem + a_cur, sim] + delays[a_idx, (a_cur & 1) ^ 1, z_val ^ 1]
        elif b == current_t:
            b_cur += 1
            inputs ^= 2
-            thresh = delays[b_idx, b_cur & 1, z_val]
+            thresh = delays[b_idx, b_cur & 1 ^ 1, z_val]
            b = cbuf[b_mem + b_cur, sim] + delays[b_idx, b_cur & 1, z_val]
            next_t = cbuf[b_mem + b_cur, sim] + delays[b_idx, (b_cur & 1) ^ 1, z_val ^ 1]
        elif c == current_t:
            c_cur += 1
            inputs ^= 4
-            thresh = delays[c_idx, c_cur & 1, z_val]
+            thresh = delays[c_idx, c_cur & 1 ^ 1, z_val]
            c = cbuf[c_mem + c_cur, sim] + delays[c_idx, c_cur & 1, z_val]
            next_t = cbuf[c_mem + c_cur, sim] + delays[c_idx, (c_cur & 1) ^ 1, z_val ^ 1]
        else:
            d_cur += 1
            inputs ^= 8
-            thresh = delays[d_idx, d_cur & 1, z_val]
+            thresh = delays[d_idx, d_cur & 1 ^ 1, z_val]
            d = cbuf[d_mem + d_cur, sim] + delays[d_idx, d_cur & 1, z_val]
            next_t = cbuf[d_mem + d_cur, sim] + delays[d_idx, (d_cur & 1) ^ 1, z_val ^ 1]

@ -235,13 +254,15 @@ def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0):
				@@ -235,13 +254,15 @@ def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0):
                or (current_t - previous_t) > thresh  # -OR- the generated hazard is wider than pulse threshold.
                ):
                if z_cur < (z_cap - 1):  # enough space in z_mem?
+                    if delta and (cbuf[z_mem + z_cur, sim] != current_t):
+                        out_changed = 1
                    cbuf[z_mem + z_cur, sim] = current_t
                    previous_t = current_t
                    z_cur += 1
                else:
                    overflows += 1
-                    previous_t = cbuf[z_mem + z_cur - 1, sim]
                    z_cur -= 1
+                    previous_t = cbuf[z_mem + z_cur, sim]
            else:
                z_cur -= 1
                previous_t = cbuf[z_mem + z_cur - 1, sim] if z_cur > 0 else TMIN
@ -255,12 +276,23 @@ def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0):
				@@ -255,12 +276,23 @@ def _wave_eval(op, cbuf, c_locs, c_caps, sim, delays, simctl_int, seed=0):

        current_t = min(a, b, c, d)

+    if delta and (cbuf[z_mem + z_cur, sim] != TMAX):
+        out_changed = 1
+
    # generate or propagate overflow flag
    cbuf[z_mem + z_cur, sim] = TMAX_OVL if overflows > 0 else max(a, b, c, d)

    nrise = max(0, (z_cur+1) // 2 - (cbuf[z_mem, sim] == TMIN))
    nfall = z_cur // 2

+    e = int(((cbuf[z_mem, sim] == TMIN) << 1) & 2)  # initial value
+    e |= z_val  # final value
+    e |= (nrise + nfall)<<2  # number of transitions
+
+    ebuf[z_idx, sim, 0] = e
+
+    ebuf[z_idx, sim, 1] = input_epoch & out_changed
+
    return nrise, nfall


@ -268,11 +300,11 @@ wave_eval_cpu = numba.njit(_wave_eval)
				@@ -268,11 +300,11 @@ wave_eval_cpu = numba.njit(_wave_eval)


@numba.njit
-def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, abuf, sim_start, sim_stop, delays, simctl_int, seed):
+def level_eval_cpu(ops, op_start, op_stop, c, c_locs, c_caps, ebuf, abuf, sim_start, sim_stop, delays, simctl_int, seed, delta):
    for op_idx in range(op_start, op_stop):
        op = ops[op_idx]
        for sim in range(sim_start, sim_stop):
-            nrise, nfall = wave_eval_cpu(op, c, c_locs, c_caps, sim, delays, simctl_int[:, sim], seed)
+            nrise, nfall = wave_eval_cpu(op, c, c_locs, c_caps, ebuf, sim, delays, simctl_int[:, sim], seed, delta)
            a_loc = op[6]
            a_wr = op[7]
            a_wf = op[8]
@ -345,12 +377,18 @@ class WaveSimCuda(WaveSim):
				@@ -345,12 +377,18 @@ class WaveSimCuda(WaveSim):
        self.delays = cuda.to_device(self.delays)
        self.simctl_int = cuda.to_device(self.simctl_int)
        self.abuf = cuda.to_device(self.abuf)
+        self.e = cuda.to_device(self.e)
+        self.error_counts = cuda.to_device(self.error_counts)
+        self.lsts = cuda.to_device(self.lsts)
+        self.overflows = cuda.to_device(self.overflows)
+
+        self.aux = cuda.to_device(np.zeros(8*1024, dtype=np.int32))

        self._block_dim = (32, 16)

    def __getstate__(self):
        state = self.__dict__.copy()
-        state['c'] = np.array(self.c)
+        del state['c']
        state['s'] = np.array(self.s)
        state['ops'] = np.array(self.ops)
        state['c_locs'] = np.array(self.c_locs)
@ -358,11 +396,16 @@ class WaveSimCuda(WaveSim):
				@@ -358,11 +396,16 @@ class WaveSimCuda(WaveSim):
        state['delays'] = np.array(self.delays)
        state['simctl_int'] = np.array(self.simctl_int)
        state['abuf'] = np.array(self.abuf)
+        state['e'] = np.array(self.e)
+        state['error_counts'] = np.array(self.error_counts)
+        state['lsts'] = np.array(self.lsts)
+        state['overflows'] = np.array(self.overflows)
+        state['aux'] = np.array(self.aux)
        return state

    def __setstate__(self, state):
        self.__dict__.update(state)
-        self.c = cuda.to_device(self.c)
+        self.c = cuda.to_device(np.full((self.c_len, self.sims), TMAX, dtype=np.float32))
        self.s = cuda.to_device(self.s)
        self.ops = cuda.to_device(self.ops)
        self.c_locs = cuda.to_device(self.c_locs)
@ -370,6 +413,11 @@ class WaveSimCuda(WaveSim):
				@@ -370,6 +413,11 @@ class WaveSimCuda(WaveSim):
        self.delays = cuda.to_device(self.delays)
        self.simctl_int = cuda.to_device(self.simctl_int)
        self.abuf = cuda.to_device(self.abuf)
+        self.e = cuda.to_device(self.e)
+        self.error_counts = cuda.to_device(self.error_counts)
+        self.lsts = cuda.to_device(self.lsts)
+        self.overflows = cuda.to_device(self.overflows)
+        self.aux = cuda.to_device(self.aux)

    def s_to_c(self):
        grid_dim = self._grid_dim(self.sims, self.s_len)
@ -377,14 +425,24 @@ class WaveSimCuda(WaveSim):
				@@ -377,14 +425,24 @@ class WaveSimCuda(WaveSim):

    def _grid_dim(self, x, y): return cdiv(x, self._block_dim[0]), cdiv(y, self._block_dim[1])

-    def c_prop(self, sims=None, seed=1):
+    def c_prop(self, sims=None, seed=1, op_from=0, op_to=None, delta=0):
        sims = min(sims or self.sims, self.sims)
        for op_start, op_stop in zip(self.level_starts, self.level_stops):
+            if op_from > op_start: continue
+            if op_to is not None and op_to <= op_start: break
            grid_dim = self._grid_dim(sims, op_stop - op_start)
-            wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.abuf, int(0),
-                sims, self.delays, self.simctl_int, seed)
+            wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.e, self.abuf, int(0),
+                sims, self.delays, self.simctl_int, seed, delta)
        cuda.synchronize()

+    def c_prop_level(self, level, sims=None, seed=1, delta=0):
+        sims = min(sims or self.sims, self.sims)
+        op_start = self.level_starts[level]
+        op_stop = self.level_stops[level]
+        grid_dim = self._grid_dim(sims, op_stop - op_start)
+        wave_eval_gpu[grid_dim, self._block_dim](self.ops, op_start, op_stop, self.c, self.c_locs, self.c_caps, self.e, self.abuf, int(0),
+            sims, self.delays, self.simctl_int, seed, delta)
+
    def c_to_s(self, time=TMAX, sd=0.0, seed=1):
        grid_dim = self._grid_dim(self.sims, self.s_len)
        wave_capture_gpu[grid_dim, self._block_dim](self.c, self.s, self.c_locs, self.c_caps, self.ppo_offset,
@ -394,6 +452,77 @@ class WaveSimCuda(WaveSim):
				@@ -394,6 +452,77 @@ class WaveSimCuda(WaveSim):
        grid_dim = self._grid_dim(self.sims, self.s_len)
        ppo_to_ppi_gpu[grid_dim, self._block_dim](self.s, self.c_locs, time, self.ppi_offset, self.ppo_offset)

+    def acc_error_counts(self, sims=None):
+        sims = min(sims or self.sims, self.sims)
+        grid_dim = cdiv(self.s_len, 256)
+        acc_error_counts_gpu[grid_dim, 256](self.s, sims, self.error_counts)
+
+    def reset_error_counts(self):
+        self.error_counts[:] = 0
+
+    def get_error_counts(self):
+        return np.array(self.error_counts)
+
+    def acc_overflows(self, sims=None):
+        sims = min(sims or self.sims, self.sims)
+        grid_dim = cdiv(self.s_len, 256)
+        acc_overflows_gpu[grid_dim, 256](self.s, sims, self.overflows)
+
+    def reset_overflows(self):
+        self.overflows[:] = 0
+
+    def get_overflows(self):
+        return np.array(self.overflows)
+
+    def acc_lsts(self, sims=None):
+        sims = min(sims or self.sims, self.sims)
+        grid_dim = cdiv(self.s_len, 256)
+        acc_lsts_gpu[grid_dim, 256](self.s, sims, self.lsts)
+
+    def reset_lsts(self):
+        self.lsts[:] = 0.0
+
+    def get_lsts(self):
+        return np.array(self.lsts)
+
+
+@cuda.jit()
+def memcpy_gpu (src, dst, nitems):
+    tid = cuda.grid(1)
+    stride = cuda.gridDim.x * cuda.blockDim.x
+    for i in range(tid, nitems, stride):
+        dst.flat[i] = src.flat[i]
+
+
+@cuda.jit()
+def acc_error_counts_gpu(s, sims, error_counts):
+    x = cuda.grid(1)
+    if x >= s.shape[1]: return
+    cnt = 0
+    for i in range(sims):
+        cnt += (s[6,x,i] != s[8,x,i])
+    error_counts[x] += cnt
+
+
+@cuda.jit()
+def acc_overflows_gpu(s, sims, overflows):
+    x = cuda.grid(1)
+    if x >= s.shape[1]: return
+    cnt = 0
+    for i in range(sims):
+        cnt += s[10,x,i]
+    overflows[x] += cnt
+
+
+@cuda.jit()
+def acc_lsts_gpu(s, sims, lsts):
+    x = cuda.grid(1)
+    if x >= s.shape[1]: return
+    lst = 0
+    for i in range(sims):
+        lst = max(lst, s[5,x,i])
+    lsts[x] = max(lsts[x], lst)
+

@cuda.jit()
 def wave_assign_gpu(c, s, c_locs, ppi_offset):
@ -423,7 +552,7 @@ _wave_eval_gpu = cuda.jit(_wave_eval, device=True)
				@@ -423,7 +552,7 @@ _wave_eval_gpu = cuda.jit(_wave_eval, device=True)


@cuda.jit()
-def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, abuf, sim_start, sim_stop, delays, simctl_int, seed):
+def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, ebuf, abuf, sim_start, sim_stop, delays, simctl_int, seed, delta):
    x, y = cuda.grid(2)
    sim = sim_start + x
    op_idx = op_start + y
@ -435,7 +564,7 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, abuf, sim_start,
				@@ -435,7 +564,7 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, abuf, sim_start,
    a_wr = op[7]
    a_wf = op[8]

-    nrise, nfall = _wave_eval_gpu(op, cbuf, c_locs, c_caps, sim, delays, simctl_int[:, sim], seed)
+    nrise, nfall = _wave_eval_gpu(op, cbuf, c_locs, c_caps, ebuf, sim, delays, simctl_int[:, sim], seed, delta)

    # accumulate WSA into abuf
    if a_loc >= 0:
--- a/tests/b15_4ig.sa_rf.stil.gz
+++ b/tests/b15_4ig.sa_rf.stil.gz
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -13,8 +13,44 @@ def b15_2ig_circuit(mydir):
				@@ -13,8 +13,44 @@ def b15_2ig_circuit(mydir):
    from kyupy.techlib import SAED32
    return verilog.load(mydir / 'b15_2ig.v.gz', branchforks=True, tlib=SAED32)

+@pytest.fixture(scope='session')
+def b15_2ig_circuit_resolved(b15_2ig_circuit):
+    from kyupy.techlib import SAED32
+    cr = b15_2ig_circuit.copy()
+    cr.resolve_tlib_cells(SAED32)
+    return cr
+
+@pytest.fixture(scope='session')
+def b15_4ig_circuit(mydir):
+    from kyupy import verilog
+    from kyupy.techlib import SAED32
+    return verilog.load(mydir / 'b15_4ig.v.gz', branchforks=True, tlib=SAED32)
+
+@pytest.fixture(scope='session')
+def b15_4ig_circuit_resolved(b15_4ig_circuit):
+    from kyupy.techlib import SAED32
+    cr = b15_4ig_circuit.copy()
+    cr.resolve_tlib_cells(SAED32)
+    return cr
+
@pytest.fixture(scope='session')
 def b15_2ig_delays(mydir, b15_2ig_circuit):
    from kyupy import sdf
    from kyupy.techlib import SAED32
    return sdf.load(mydir / 'b15_2ig.sdf.gz').iopaths(b15_2ig_circuit, tlib=SAED32)[1:2]
+
+@pytest.fixture(scope='session')
+def b15_2ig_sa_nf_test_resp(mydir, b15_2ig_circuit_resolved):
+    from kyupy import stil
+    s = stil.load(mydir / 'b15_2ig.sa_nf.stil.gz')
+    tests = s.tests(b15_2ig_circuit_resolved)[:,1:]
+    resp = s.responses(b15_2ig_circuit_resolved)[:,1:]
+    return (tests, resp)
+
+@pytest.fixture(scope='session')
+def b15_4ig_sa_rf_test_resp(mydir, b15_4ig_circuit_resolved):
+    from kyupy import stil
+    s = stil.load(mydir / 'b15_4ig.sa_rf.stil.gz')
+    tests = s.tests(b15_4ig_circuit_resolved)[:,1:]
+    resp = s.responses(b15_4ig_circuit_resolved)[:,1:]
+    return (tests, resp)
--- a/tests/gates.sdf
+++ b/tests/gates.sdf
@ -7,22 +7,49 @@
				@@ -7,22 +7,49 @@
 (TEMPERATURE 25.00:25.00:25.00)
 (TIMESCALE 1ns)
 (CELL
-  (CELLTYPE "NAND2X1")
+  (CELLTYPE "NAND2_X1")
  (INSTANCE nandgate)
  (DELAY
    (ABSOLUTE
-    (IOPATH IN1 QN (0.099:0.103:0.103) (0.122:0.127:0.127))
-    (IOPATH IN2 QN (0.083:0.086:0.086) (0.100:0.104:0.104))
+    (IOPATH A1 ZN (0.099:0.103:0.103) (0.122:0.127:0.127))
+    (IOPATH A2 ZN (0.083:0.086:0.086) (0.100:0.104:0.104))
    )
  )
 )
 (CELL
-  (CELLTYPE "AND2X1")
+  (CELLTYPE "AND2_X1")
  (INSTANCE andgate)
  (DELAY
    (ABSOLUTE
-    (IOPATH IN1 Q (0.367:0.378:0.378) (0.351:0.377:0.377))
-    (IOPATH IN2 Q (0.366:0.375:0.375) (0.359:0.370:0.370))
+    (IOPATH A1 ZN (0.367:0.378:0.378) (0.351:0.377:0.377))
+    (IOPATH A2 ZN (0.366:0.375:0.375) (0.359:0.370:0.370))
+    )
+  )
+)
+(CELL
+  (CELLTYPE "OAI21_X1")
+  (INSTANCE oai21gate)
+  (DELAY 
+    (ABSOLUTE
+    (IOPATH B1 ZN (0.000:0.025:0.025) (0.000:0.013:0.013))
+    (IOPATH B2 ZN (0.000:0.030:0.030) (0.000:0.016:0.016))
+    ( COND (B1 == 1'b0) && (B2 == 1'b1) (IOPATH A ZN (0.000:0.018:0.018)))
+    (COND (B1 == 1'b1) && (B2 == 1'b0) (IOPATH A ZN (0.000:0.018:0.018) (0.000:0.016:0.016)))
+    (COND (B1 == 1'b1) && (B2 == 1'b1) (IOPATH A ZN (0.000:0.019:0.019) (0.000:0.014:0.014)))
+    )
+  )
+)
+(CELL
+  (CELLTYPE "MUX2_X1")
+  (INSTANCE mux2gate)
+  (DELAY 
+    (ABSOLUTE
+    (COND (B == 1'b0) && (S == 1'b0) (IOPATH A Z (0.000:0.037:0.037) (0.000:0.058:0.058)))
+    (COND (B == 1'b1) && (S == 1'b0) (IOPATH A Z (0.000:0.037:0.037) (0.000:0.058:0.058)))
+    (COND (A == 1'b0) && (S == 1'b1) (IOPATH B Z (0.000:0.035:0.035) (0.000:0.056:0.056)))
+    (COND (A == 1'b1) && (S == 1'b1) (IOPATH B Z (0.000:0.035:0.035) (0.000:0.056:0.056)))
+    (COND (A == 1'b0) && (B == 1'b1) (IOPATH S Z (0.000:0.047:0.047) (0.000:0.073:0.073)))
+    (COND (A == 1'b1) && (B == 1'b0) (IOPATH S Z (0.000:0.072:0.072) (0.000:0.064:0.064)))
    )
  )
 )
--- a/tests/gates.v
+++ b/tests/gates.v
@ -1,11 +1,15 @@
				@@ -1,11 +1,15 @@
-module gates (a, b, o0, o1 );
+module gates (a, b, c, o0, o1, o2, o3 );
 input a;
 input b;
+input c;
 output o0;
 output o1;
+output o2;
+output o3;

-AND2X1 andgate (.IN1 ( a ) , .IN2 ( b ) , .Q ( o0 ) ) ;
-NAND2X1 nandgate (.IN1 ( a ) , .IN2 ( b ) , .QN ( o1 ) ) ;
-
+AND2_X1 andgate (.A1 ( a ) , .A2 ( b ) , .ZN ( o0 ) ) ;
+NAND2_X1 nandgate (.A1 ( a ) , .A2 ( b ) , .ZN ( o1 ) ) ;
+OAI21_X1 oai21gate (.B1(a), .B2(b), .A(c), .ZN(o2) ) ;
+MUX2_X1 mux2gate (.A(a), .B(b), .S(c), .Z(o3)) ;

 endmodule
--- a/tests/test_circuit.py
+++ b/tests/test_circuit.py
@ -1,9 +1,30 @@
				@@ -1,9 +1,30 @@
 import pickle

-from kyupy.circuit import Circuit, Node, Line
+from kyupy.circuit import GrowingList, Circuit, Node, Line
 from kyupy import verilog, bench
 from kyupy.techlib import SAED32

+def test_growing_list():
+    gl = GrowingList()
+    assert gl.free_idx == 0
+    gl[0] = 1
+    assert gl.free_idx == 1
+    gl[2] = 1
+    assert gl.free_idx == 1
+    gl[0] = None
+    assert gl.free_idx == 0
+    gl[0] = 1
+    assert gl.free_idx == 1
+    gl[1] = 1
+    assert gl.free_idx == 3
+    gl.append(1)
+    assert gl.free_idx == 4
+    gl[2] = None
+    assert gl.free_idx == 2
+    gl[2] = 1
+    gl[1] = None
+    assert gl.free_idx == 1
+
 def test_lines():
    c = Circuit()
    n1 = Node(c, 'n1')
--- a/tests/test_logic_sim.py
+++ b/tests/test_logic_sim.py
@ -1,6 +1,6 @@
				@@ -1,6 +1,6 @@
 import numpy as np

-from kyupy.logic_sim import LogicSim
+from kyupy.logic_sim import LogicSim, LogicSim6V
 from kyupy import bench, logic, sim
 from kyupy.logic import mvarray, bparray, bp_to_mv, mv_to_bp

@ -94,6 +94,30 @@ def test_4v():
				@@ -94,6 +94,30 @@ def test_4v():
        '--0XX', '--X1X', '--XXX', '--XXX'))


+def test_6v():
+    c = bench.parse('input(x, y) output(a, o, n, xo, no) a=AND2(x,y) o=OR2(x,y) n=INV1(x) xo=XOR2(x,y) no=NOR2(x,y)')
+    s = LogicSim6V(c, 36)
+    assert s.s_len == 7
+    mva = mvarray(
+        '0000101', '0101110', '0R0R1RF', '0F0F1FR', '0P0P1PN', '0N0N1NP',
+        '1001010', '1111000', '1RR10F0', '1FF10R0', '1PP10N0', '1NN10P0',
+        'R00RFRF', 'R1R1FF0', 'RRRRFPF', 'RFPNFNP', 'RPPRFRF', 'RNRNFFP',
+        'F00FRFR', 'F1F1RR0', 'FRPNRNP', 'FFFFRPR', 'FPPFRFR', 'FNFNRRP',
+        'P00PNPN', 'P1P1NN0', 'PRPRNRF', 'PFPFNFR', 'PPPPNPN', 'PNPNNNP',
+        'N00NPNP', 'N1N1PP0', 'NRRNPFP', 'NFFNPRP', 'NPPNPNP', 'NNNNPPP')
+    tests = np.copy(mva)
+    tests[2:] = logic.ZERO
+    s.s[0] = tests
+    s.s_to_c()
+    s.c_prop()
+    s.c_to_s()
+    resp = s.s[1].copy()
+
+    exp_resp = np.copy(mva)
+    exp_resp[:2] = logic.ZERO
+    np.testing.assert_allclose(resp, exp_resp)
+
+
 def test_8v():
    c = bench.parse('input(x, y) output(a, o, n, xo) a=and(x,y) o=or(x,y) n=not(x) xo=xor(x,y)')
    s = LogicSim(c, 64, m=8)
@ -173,3 +197,64 @@ def test_b01(mydir):
				@@ -173,3 +197,64 @@ def test_b01(mydir):
    s.c_prop()
    s.c_to_s()
    bp_to_mv(s.s[1])
+
+
+def sim_and_compare(c, test_resp, m=8):
+    tests, resp = test_resp
+    lsim = LogicSim(c, m=m, sims=tests.shape[1])
+    lsim.s[0] = logic.mv_to_bp(tests)
+    lsim.s_to_c()
+    lsim.c_prop()
+    lsim.c_to_s()
+    resp_sim = logic.bp_to_mv(lsim.s[1])[:,:tests.shape[1]]
+    idxs, pats = np.nonzero(((resp == logic.ONE) & (resp_sim != logic.ONE)) | ((resp == logic.ZERO) & (resp_sim != logic.ZERO)))
+    for i, (idx, pat) in enumerate(zip(idxs, pats)):
+        if i >= 10:
+            print(f'...')
+            break
+        print(f'mismatch pattern:{pat} ppio:{idx} exp:{logic.mv_str(resp[idx,pat])} act:{logic.mv_str(resp_sim[idx,pat])}')
+    assert len(idxs) == 0
+
+def sim_and_compare_6v(c, test_resp):
+    tests, resp = test_resp
+    lsim = LogicSim6V(c, sims=tests.shape[1])
+    lsim.s[0] = tests
+    lsim.s_to_c()
+    lsim.c_prop()
+    lsim.c_to_s()
+    resp_sim = lsim.s[1]
+    idxs, pats = np.nonzero(((resp == logic.ONE) & (resp_sim != logic.ONE)) | ((resp == logic.ZERO) & (resp_sim != logic.ZERO)))
+    for i, (idx, pat) in enumerate(zip(idxs, pats)):
+        if i >= 10:
+            print(f'...')
+            break
+        print(f'mismatch pattern:{pat} ppio:{idx} exp:{logic.mv_str(resp[idx,pat])} act:{logic.mv_str(resp_sim[idx,pat])}')
+    assert len(idxs) == 0
+
+
+def test_b15_2ig_sa_2v(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp):
+    sim_and_compare(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp, m=2)
+
+
+def test_b15_2ig_sa_4v(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp):
+    sim_and_compare(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp, m=4)
+
+
+def test_b15_2ig_sa_6v(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp):
+    sim_and_compare_6v(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp)
+
+
+def test_b15_2ig_sa_8v(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp):
+    sim_and_compare(b15_2ig_circuit_resolved, b15_2ig_sa_nf_test_resp, m=8)
+
+
+def test_b15_4ig_sa_2v(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp):
+    sim_and_compare(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp, m=2)
+
+
+def test_b15_4ig_sa_4v(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp):
+    sim_and_compare(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp, m=4)
+
+
+def test_b15_4ig_sa_8v(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp):
+    sim_and_compare(b15_4ig_circuit_resolved, b15_4ig_sa_rf_test_resp, m=8)
--- a/tests/test_sdf.py
+++ b/tests/test_sdf.py
@ -2,7 +2,7 @@ import numpy as np
				@@ -2,7 +2,7 @@ import numpy as np

 from kyupy import sdf, verilog, bench
 from kyupy.wave_sim import WaveSim, TMAX, TMIN
-from kyupy.techlib import SAED32, SAED90
+from kyupy.techlib import SAED32, NANGATE45

 def test_parse():
    test = '''
@ -80,9 +80,9 @@ def test_b15(mydir):
				@@ -80,9 +80,9 @@ def test_b15(mydir):


 def test_gates(mydir):
-    c = verilog.load(mydir / 'gates.v', tlib=SAED90)
+    c = verilog.load(mydir / 'gates.v', tlib=NANGATE45)
    df = sdf.load(mydir / 'gates.sdf')
-    lt = df.iopaths(c, tlib=SAED90)[1]
+    lt = df.iopaths(c, tlib=NANGATE45)[1]
    nand_a = c.cells['nandgate'].ins[0]
    nand_b = c.cells['nandgate'].ins[1]
    and_a = c.cells['andgate'].ins[0]
--- a/tests/test_verilog.py
+++ b/tests/test_verilog.py
@ -1,5 +1,5 @@
				@@ -1,5 +1,5 @@
 from kyupy import verilog
-from kyupy.techlib import SAED90, SAED32
+from kyupy.techlib import SAED90, SAED32, NANGATE45

 def test_b01(mydir):
    with open(mydir / 'b01.v', 'r') as f:
@ -26,12 +26,12 @@ def test_b15(mydir):
				@@ -26,12 +26,12 @@ def test_b15(mydir):


 def test_gates(mydir):
-    c = verilog.load(mydir / 'gates.v', tlib=SAED90)
-    assert len(c.nodes) == 10
-    assert len(c.lines) == 10
+    c = verilog.load(mydir / 'gates.v', tlib=NANGATE45)
+    assert len(c.nodes) == 18
+    assert len(c.lines) == 21
    stats = c.stats
-    assert stats['input'] == 2
-    assert stats['output'] == 2
+    assert stats['input'] == 3
+    assert stats['output'] == 4
    assert stats['__seq__'] == 0


--- a/tests/test_wave_sim.py
+++ b/tests/test_wave_sim.py
@ -5,22 +5,56 @@ from kyupy.logic_sim import LogicSim
				@@ -5,22 +5,56 @@ from kyupy.logic_sim import LogicSim
 from kyupy import logic, bench, sim
 from kyupy.logic import mvarray

+def test_xnor2_delays():
+    op = (sim.XNOR2, 2, 0, 1, 3, 3, -1, 0, 0)
+    #op = (0b0111, 4, 0, 1)
+    c = np.full((4*16, 1), TMAX, dtype=np.float32)  # 4 waveforms of capacity 16
+    c_locs = np.zeros((4,), dtype='int')
+    c_caps = np.zeros((4,), dtype='int')
+    ebuf = np.zeros((4, 1, 2), dtype=np.int32)
+
+    for i in range(4): c_locs[i], c_caps[i] = i*16, 16  # 1:1 mapping
+    delays = np.zeros((1, 4, 2, 2))
+    delays[0, 0, 0, 0] = 0.031  # A rise -> Z rise
+    delays[0, 0, 0, 1] = 0.027  # A rise -> Z fall
+    delays[0, 0, 1, 0] = 0.033  # A fall -> Z rise
+    delays[0, 0, 1, 1] = 0.037  # A fall -> Z fall
+    delays[0, 1, 0, 0] = 0.032  # B rise -> Z rise
+    delays[0, 1, 0, 1] = 0.030  # B rise -> Z fall
+    delays[0, 1, 1, 0] = 0.038  # B fall -> Z rise
+    delays[0, 1, 1, 1] = 0.036  # B fall -> Z fall
+
+    simctl_int = np.asarray([0], dtype=np.int32)
+
+    def wave_assert(inputs, output):
+        for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i
+        wave_eval_cpu(op, c, c_locs, c_caps, ebuf, 0, delays, simctl_int, 0, 0)
+        for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[2,i], v)
+
+    wave_assert([[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX])      # XNOR(1,1) => 1
+    wave_assert([[TMAX,TMAX],[TMIN,TMAX]], [TMAX])           # XNOR(0,1) => 0
+    # using Afall/Zfall for pulse length, bug: was using Arise/Zfall
+    #wave_assert([[0.07, 0.10, TMAX], [0.0, TMAX]], [TMIN, 0.03, 0.101, 0.137, TMAX])
+    wave_assert([[0.07, 0.10, TMAX], [0.0, TMAX]], [TMIN, 0.03, TMAX])
+    wave_assert([[0.06, 0.10, TMAX], [0.0, TMAX]], [TMIN, 0.03, 0.091, 0.137, TMAX])
+
 def test_nand_delays():
    op = (sim.NAND4, 4, 0, 1, 2, 3, -1, 0, 0)
    #op = (0b0111, 4, 0, 1)
-    c = np.full((5*16, 1), TMAX)  # 5 waveforms of capacity 16
+    c = np.full((5*16, 1), TMAX, dtype=np.float32)  # 5 waveforms of capacity 16
    c_locs = np.zeros((5,), dtype='int')
    c_caps = np.zeros((5,), dtype='int')
+    ebuf = np.zeros((4, 1, 2), dtype=np.int32)

    for i in range(5): c_locs[i], c_caps[i] = i*16, 16  # 1:1 mapping

    # SDF specifies IOPATH delays with respect to output polarity
    # SDF pulse rejection value is determined by IOPATH causing last transition and polarity of last transition
    delays = np.zeros((1, 5, 2, 2))
-    delays[0, 0, 0, 0] = 0.1  # A -> Z rise delay
-    delays[0, 0, 0, 1] = 0.2  # A -> Z fall delay
-    delays[0, 0, 1, 0] = 0.1  # A -> Z negative pulse limit (terminate in rising Z)
-    delays[0, 0, 1, 1] = 0.2  # A -> Z positive pulse limit
+    delays[0, 0, 0, 0] = 0.1  # A rise -> Z rise
+    delays[0, 0, 0, 1] = 0.2  # A rise -> Z fall
+    delays[0, 0, 1, 0] = 0.1  # A fall -> Z rise
+    delays[0, 0, 1, 1] = 0.2  # A fall -> Z fall
    delays[0, 1, :, 0] = 0.3  # as above for B -> Z
    delays[0, 1, :, 1] = 0.4
    delays[0, 2, :, 0] = 0.5  # as above for C -> Z
@ -32,7 +66,7 @@ def test_nand_delays():
				@@ -32,7 +66,7 @@ def test_nand_delays():

    def wave_assert(inputs, output):
        for i, a in zip(inputs, c.reshape(-1,16)): a[:len(i)] = i
-        wave_eval_cpu(op, c, c_locs, c_caps, 0, delays, simctl_int)
+        wave_eval_cpu(op, c, c_locs, c_caps, ebuf, 0, delays, simctl_int, 0, 0)
        for i, v in enumerate(output): np.testing.assert_allclose(c.reshape(-1,16)[4,i], v)

    wave_assert([[TMAX,TMAX],[TMAX,TMAX],[TMIN,TMAX],[TMIN,TMAX]], [TMIN,TMAX]) # NAND(0,0,1,1) => 1
@ -145,7 +179,7 @@ def compare_to_logic_sim(wsim: WaveSim):
				@@ -145,7 +179,7 @@ def compare_to_logic_sim(wsim: WaveSim):
    lsim.s_to_c()
    lsim.c_prop()
    lsim.c_to_s()
-    exp = logic.bp_to_mv(lsim.s[1])
+    exp = logic.bp_to_mv(lsim.s[1])[:,:tests.shape[-1]]

    resp[resp == logic.PPULSE] = logic.ZERO
    resp[resp == logic.NPULSE] = logic.ONE
@ -156,13 +190,13 @@ def compare_to_logic_sim(wsim: WaveSim):
				@@ -156,13 +190,13 @@ def compare_to_logic_sim(wsim: WaveSim):
    np.testing.assert_allclose(resp, exp)


-def test_b15(b15_2ig_circuit, b15_2ig_delays):
-    compare_to_logic_sim(WaveSim(b15_2ig_circuit, b15_2ig_delays, 8))
+def test_b15(b15_2ig_circuit_resolved, b15_2ig_delays):
+    compare_to_logic_sim(WaveSim(b15_2ig_circuit_resolved, b15_2ig_delays, 8))


-def test_b15_strip_forks(b15_2ig_circuit, b15_2ig_delays):
-    compare_to_logic_sim(WaveSim(b15_2ig_circuit, b15_2ig_delays, 8, strip_forks=True))
+def test_b15_strip_forks(b15_2ig_circuit_resolved, b15_2ig_delays):
+    compare_to_logic_sim(WaveSim(b15_2ig_circuit_resolved, b15_2ig_delays, 8, strip_forks=True))


-def test_b15_cuda(b15_2ig_circuit, b15_2ig_delays):
-    compare_to_logic_sim(WaveSimCuda(b15_2ig_circuit, b15_2ig_delays, 8, strip_forks=True))
+def test_b15_cuda(b15_2ig_circuit_resolved, b15_2ig_delays):
+    compare_to_logic_sim(WaveSimCuda(b15_2ig_circuit_resolved, b15_2ig_delays, 8, strip_forks=True))
Author	SHA1	Message	Date
Stefan Holst	d2357859f6	more robust matching and assign processing	5 months ago
Stefan Holst	53629c5c28	support injection into specific sims	5 months ago
Stefan Holst	e64845e8c0	fanout generator	5 months ago
Stefan Holst	da98ca2db7	signal flips in compiled code	5 months ago
Stefan Holst	deb4599206	fix tests	5 months ago
Stefan Holst	c1c9ec9aae	pin_name, cleanup legacy code	5 months ago
Stefan Holst	4c55dcec60	delta sim for improving fault sim performance	6 months ago
Stefan Holst	a4b7364478	mux21 in 6v logic sim, more test fixtures	6 months ago
Stefan Holst	f59e97afa9	remove hashes, add lst, overflow, ebuf	6 months ago
Stefan Holst	f6baf9cb5e	a fast 6v sim	6 months ago
Stefan Holst	fc030c6708	allow interconnect annotations without forks	6 months ago
Stefan Holst	795cac0716	initial and final values from mvarrays	6 months ago
Stefan Holst	3a8777e0a3	none-filtering iterator for GrowingList	6 months ago
Stefan Holst	68e8cb844a	pass line id to inject_cb	7 months ago
Stefan Holst	1a3b91c1c0	fix comment	10 months ago
Stefan Holst	aa7536b8b0	line use and diff	10 months ago
Stefan Holst	fccf5e0d84	fix log limit	10 months ago
Stefan Holst	a6d1e4099c	alap toposort, improve tests	11 months ago
Stefan Holst	1654915ed6	support for partial re-sim	11 months ago
Stefan Holst	d2a2484efa	fix fault injection	11 months ago
Stefan Holst	de79393dfc	fix log limiter, use eng notation	1 year ago
Stefan Holst	4bb3f3424a	cond in sdf parser. ignored for now.	1 year ago
Stefan Holst	a6243b43f6	keep s_nodes	1 year ago
Stefan Holst	baeb759824	types, perf op growing list, keep s_nodes	1 year ago
Stefan Holst	967a232b1c	fix pulse threshold selection	1 year ago
Stefan Holst	8096416b0e	save test position for each pattern	1 year ago
Stefan Holst	a4cce9f8c0	Produce stable value when trans. to/from -	1 year ago
Stefan Holst	4f6b733eb4	fix NanGate variants, version bump	1 year ago
Stefan Holst	371bc906b3	Merge branch 'main' into devel fix readthedocs	1 year ago
Stefan Holst	0ade89defa	remove old test data, intro check	1 year ago
Stefan Holst	7f4026f504	def-file docs	1 year ago
Stefan Holst	e6a0d59d44	def-file docs	1 year ago
Stefan Holst	63e5f32e21	better ignore	1 year ago
Stefan Holst	35e727e714	better docs, new techlib as default, fix tests	1 year ago
Stefan Holst	83445e2bbd	support for newer NANGATE lib	1 year ago
Stefan Holst	c67148c0ee	doc fix	1 year ago
Stefan Holst	280c425486	fix test	1 year ago
Stefan Holst	5be82da49a	avoid holes in forks, update intro	1 year ago
Stefan Holst	b3dbe9765a	fix xor in libs, remove old code	1 year ago
Stefan Holst	5e573b0408	fix substitute for inputs with fo, dot graph	1 year ago
Stefan Holst	08d9f5a9bf	one-bit busses	1 year ago
Stefan Holst	b098fb219d	fix for unconnected named pins, double-declaration	1 year ago
Stefan Holst	97387e962b	add GSC180nm	1 year ago
Stefan Holst	f4d875f7e5	docs	1 year ago
Stefan Holst	cf9a98b5ce	del deprecated sdf code, explicit tlib use	1 year ago
Stefan Holst	d8f605a47a	fix double-free when fo goes to same cell	1 year ago
Stefan Holst	ec5626b8ca	remove old connections in substitute node reuse	1 year ago
Stefan Holst	5a693f7b9b	preserve node order during resolve	1 year ago
Stefan Holst	19bbe2c260	update intro	1 year ago
Stefan Holst	d3897246c5	move resolving cells to circuit, more doc	1 year ago
Stefan Holst	9bda7a4c57	capitalize tech libs	1 year ago
Stefan Holst	2270a9eee7	fix fork stripping + fork None values	1 year ago
Stefan Holst	ea45a326ec	add latch, fix xor delays, improve test	1 year ago
Stefan Holst	1e9fe7707b	saed32nm	1 year ago
Stefan Holst	50a5d8a290	one cell inherits name in substitute, sim fix	1 year ago
Stefan Holst	d97555e9e9	fix simprim cells, add saed90	1 year ago
Stefan Holst	47ee8d5878	improve substitute, update notebook output	1 year ago
Stefan Holst	c32584fc76	1to1 fork optimization, fix substitute	1 year ago
Stefan Holst	39b8c1695b	full constants support, fix signal declarations	1 year ago
Stefan Holst	80d26b6f0b	Add AO211 and OA211, fix MUX21	1 year ago
Stefan Holst	f7ef78e58d	support for limiting log messages	1 year ago
Stefan Holst	7afb13b33b	mv_str for single values, remove undue assert	1 year ago
Stefan Holst	153442a10a	def file parser	1 year ago
Stefan Holst	afb0a64953	wsa accumulation in wavesim	1 year ago
Stefan Holst	c49667edc1	remove old code, verilog positional pins	1 year ago
Stefan Holst	d921eb5048	sim support for remaining primitives	1 year ago
Stefan Holst	670fb0b3fc	circuit node substitution	1 year ago
Stefan Holst	f8bf579be2	support concat, bus select, ISOL cells	1 year ago
Stefan Holst	f61e2b42e8	support more cells in logic sim	1 year ago
Stefan Holst	4aec335abb	verilog: concat assignments, more comments	1 year ago
Stefan Holst	1a9cb396bf	tweak repr, doc	1 year ago
Stefan Holst	3875dc38f9	docs	1 year ago
Stefan Holst	ecb7171c37	docs	1 year ago
Stefan Holst	8957db48ab	docs	1 year ago
Stefan Holst	0b15f9fa18	doc improvements	1 year ago
Stefan Holst	dc76a9f517	new into demo	1 year ago
Stefan Holst	0968cb451e	docs, fix stil unassigned, fix io_locs for busses	1 year ago
Stefan Holst	947df89434	add AOI21 to logic sim	1 year ago
Stefan Holst	f17e461fdd	fix reading directly from file handle	1 year ago
Stefan Holst	d6d981a351	support for det vars	1 year ago
Stefan Holst	7a060b1831	support for static variations	2 years ago
Stefan Holst	03802ac9f8	make sims pickleable	2 years ago
Stefan Holst	70caea065e	more cleanup	2 years ago
Stefan Holst	f04f1b0012	cleanup	2 years ago
Stefan Holst	44b0c887d7	random sampling of delays	2 years ago
Stefan Holst	4e2022291e	fix cuda ppo_to_ppi	2 years ago
Stefan Holst	5566b80e52	simprim, vat refactor, batchrange	2 years ago
stefan	63c0b48537	bump	2 years ago
stefan	6520ee23ef	cleanup and new intro notebook	2 years ago
stefan	1810d40959	pytest work without cuda	2 years ago
Stefan Holst	7430ebb068	jitted logic sim	2 years ago
stefan	89f317b463	better circuit statsu, 2v logic sim	2 years ago
Stefan Holst	753ce566e4	Timer improvements, log in yaml	2 years ago
stefan	1eb8d87884	faster logic sim, removing MVArray, BPArray	2 years ago
Stefan Holst	02f3a0e1b2	correct timing padding	2 years ago
Stefan Holst	fc8e65e788	bit-packing utility	2 years ago
Stefan Holst	d80a3ae2b1	timer utility	2 years ago
Stefan Holst	7bfc02e683	more on-gpu code, bump python requirement	2 years ago
Stefan Holst	8da4a62bce	switch to new wave_sim, silence occupancy warnings	2 years ago
Stefan Holst	3497bfdc75	first gpu-code, cached test fixtures	2 years ago
Stefan Holst	f1ebe1487c	new wave sim	2 years ago
Stefan Holst	f0dac36ac7	interface -> io_nodes, io_loc fix	2 years ago
Stefan Holst	b2953aef25	only dff	2 years ago
Stefan Holst	3774b14286	support ppi/ppo	2 years ago
Stefan Holst	4847ad9c40	locating io ports and busses by name	2 years ago
Stefan Holst	6801606dca	new common scheduler for simulators	2 years ago
Stefan Holst	faf41f0863	ff transitions switch	2 years ago
Stefan Holst	6430f10f73	HADD pin index fix	2 years ago
Stefan Holst	fa19af8c31	4-input gate simulator	2 years ago
Stefan Holst	93a0858d2f	oai and aoi pin handling fix	2 years ago
Stefan Holst	1f2808ee31	Merge branch 'main' into devel	2 years ago
Stefan Holst	163b348a0c	year bump	2 years ago
Stefan Holst	ecfc692edc	support reset RN for scan cells	2 years ago
Stefan Holst	afb7e745a1	adding aoi to logic sim	2 years ago
Stefan Holst	6a8841c3c6	revert wave_eval4	2 years ago
Stefan Holst	c530983afa	accept I as a first input	3 years ago
Stefan Holst	775b13c694	fix off-by-1 pin index when loading AOI and OAI cells	3 years ago
Stefan Holst	584445f3b1	wave eval for 4-input gates	3 years ago
Stefan Holst	85dd02d4d7	interpret N as unassigned in STIL	3 years ago
Stefan Holst	7c03271048	improve robustness of sdf annotation and wave sim	3 years ago
Stefan Holst	8bbaaf8fae	comment change	3 years ago
Stefan Holst	d59d6401c8	fix stil loading and logic sim capture	3 years ago
Stefan Holst	387c436207	fix tests, version bump	3 years ago
Stefan Holst	b981b1153c	add sdata to control individual sims	3 years ago
Stefan Holst	87d93afb44	fix time in unpickled log objects	4 years ago
Stefan Holst	c3e4090f31	make nodes and lines hashable again	4 years ago
Stefan Holst	0251d66d28	make circuit pickable and comparable	4 years ago
Stefan Holst	864230b883	initial letch support, fix capture in logic sim	4 years ago
Stefan Holst	d05841a6a2	Merge branch 'main' into devel	4 years ago
Stefan Holst	c5be32d7e5	doc and indent fix	4 years ago
Stefan Holst	8434f5e694	fixes for IWLS benchmark netlists	4 years ago
Stefan Holst	9ff2369a55	fix parsing older stil files	4 years ago
Stefan Holst	82a53e0171	improve techlib for gsclib, better constant handling in verilog parser	4 years ago
Stefan Holst	a2df0e5682	fix ff annotation	4 years ago
Stefan Holst	ec37e11fef	Merge branch 'main' into devel	4 years ago
Stefan Holst	3a5a3c128b	year bump	4 years ago
Stefan Holst	ee30898cef	docs for numba and cuda	4 years ago
Stefan Holst	62cf56e98a	TechLib class, remove unnecessary .index	4 years ago
Stefan Holst	dc003fa624	documentation improvements	4 years ago
Stefan Holst	8b5a71f498	documentation improvements	4 years ago
Stefan Holst	9c8dee31b9	assign and capture return arrays, new cycle method for common use pattern	4 years ago
Stefan Holst	2bbdf3ee5d	fix logic sim of DFF.QN output	4 years ago
Stefan Holst	35cf63cf38	Make Node and Line indexable, documentation.	4 years ago
Stefan Holst	ff4de6d782	de-lint and repr improvements	4 years ago
Stefan Holst	c12a30328c	better hr_time	4 years ago
Stefan Holst	7e6660002b	support ibuff in WaveSim	4 years ago
Stefan Holst	dfbc35eeb9	logging range fixes	4 years ago
Stefan Holst	4f531fe4cb	implement logging range	4 years ago
Stefan Holst	18c17b5f76	more docs and reprs	4 years ago
Stefan Holst	0bad95e94e	LogicSim clean-up and new fault injection facility. version bump.	4 years ago
Stefan Holst	7501613951	remove comments	4 years ago
Stefan Holst	5084f1dd8c	demo nb run with cuda	4 years ago
Stefan Holst	7f035c1ac5	Migration to new logic value representation	4 years ago
Stefan Holst	7bcfbf502b	Documentation, cleanup, multi-valued logic	4 years ago
Stefan Holst	5830608527	Documenting circuit module	4 years ago
Stefan Holst	cff18e0915	start documentation	4 years ago
Stefan Holst	a77ac4a397	start designing new data structures for m-valued logic	4 years ago