From 35e727e7142bfbc1258719cd47672dbe33a88ec1 Mon Sep 17 00:00:00 2001
From: Stefan Holst <holst@ci.kyutech.ac.jp>
Date: Sat, 22 Jul 2023 13:11:11 +0900
Subject: [PATCH] better docs, new techlib as default, fix tests

---
 README.rst                          |   2 +-
 docs/index.rst                      |   1 +
 docs/miscellaneous.rst              |   4 -
 docs/simulators.rst                 |   5 +
 docs/techlib.rst                    |   7 +
 src/kyupy/__init__.py               |   7 +-
 src/kyupy/circuit.py                |  34 ++-
 src/kyupy/logic.py                  |   2 +-
 src/kyupy/logic_sim.py              |  40 ++--
 src/kyupy/sdf.py                    |  31 ++-
 src/kyupy/stil.py                   |  31 ++-
 src/kyupy/techlib.py                | 308 +++++++++++++------------
 src/kyupy/verilog.py                |  26 ++-
 src/kyupy/wave_sim.py               |  49 ++--
 tests/conftest.py                   |   8 +-
 tests/rng_haltonBase2.synth_yosys.v | 335 ++++++++++++++++++++++++++++
 tests/test_sdf.py                   |   7 +-
 tests/test_stil.py                  |  20 +-
 tests/test_verilog.py               |  12 +-
 tests/test_wave_sim.py              |  15 +-
 20 files changed, 683 insertions(+), 261 deletions(-)
 create mode 100644 docs/techlib.rst
 create mode 100644 tests/rng_haltonBase2.synth_yosys.v

diff --git a/README.rst b/README.rst
index 0957c8a..a00df1b 100644
--- a/README.rst
+++ b/README.rst
@@ -6,7 +6,7 @@ It contains fundamental building blocks for research software in the fields of V
 
 * Efficient data structures for gate-level circuits and related design data.
 * Partial `lark <https://github.com/lark-parser/lark>`_ parsers for common design files like
-  bench, gate-level verilog, standard delay format (SDF), standard test interface language (STIL).
+  bench, gate-level Verilog, standard delay format (SDF), standard test interface language (STIL), design exchange format (DEF).
 * Bit-parallel gate-level 2-, 4-, and 8-valued logic simulation.
 * GPU-accelerated high-throughput gate-level timing simulation.
 * High-performance through the use of `numpy <https://numpy.org>`_ and `numba <https://numba.pydata.org>`_.
diff --git a/docs/index.rst b/docs/index.rst
index c1e3889..06f085d 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -8,6 +8,7 @@ API Reference
 
    circuit
    logic
+   techlib
    parsers
    simulators
    miscellaneous
diff --git a/docs/miscellaneous.rst b/docs/miscellaneous.rst
index fff469f..f49363b 100644
--- a/docs/miscellaneous.rst
+++ b/docs/miscellaneous.rst
@@ -4,7 +4,3 @@ Miscellaneous
 .. automodule:: kyupy
    :members:
 
-.. automodule:: kyupy.techlib
-   :members:
-
-
diff --git a/docs/simulators.rst b/docs/simulators.rst
index bcc0ea4..44360f7 100644
--- a/docs/simulators.rst
+++ b/docs/simulators.rst
@@ -1,6 +1,11 @@
 Simulators
 ==========
 
+KyuPy's simulators are optimized for cells with at most 4 inputs and 1 output.
+
+More complex cells must be mapped to simulation primitives first.
+
+
 Logic Simulation - :mod:`kyupy.logic_sim`
 -----------------------------------------
 
diff --git a/docs/techlib.rst b/docs/techlib.rst
new file mode 100644
index 0000000..8e23b2d
--- /dev/null
+++ b/docs/techlib.rst
@@ -0,0 +1,7 @@
+Technology Libraries
+====================
+
+.. automodule:: kyupy.techlib
+   :members:
+
+
diff --git a/src/kyupy/__init__.py b/src/kyupy/__init__.py
index a531281..123bd03 100644
--- a/src/kyupy/__init__.py
+++ b/src/kyupy/__init__.py
@@ -1,6 +1,5 @@
-"""A package for processing and analysis of non-hierarchical gate-level VLSI designs.
+"""The kyupy package itself contains a miscellaneous utility functions.
 
-The kyupy package itself contains a logger and other simple utility functions.
 In addition, it defines a ``numba`` and a ``cuda`` objects that point to the actual packages
 if they are available and otherwise point to mocks.
 """
@@ -22,7 +21,7 @@ def cdiv(x, y):
 
 
 def popcount(a):
-    """Returns the number of 1-bits in a given packed numpy array."""
+    """Returns the number of 1-bits in a given packed numpy array of type ``uint8``."""
     return np.sum(_pop_count_lut[a])
 
 
@@ -81,6 +80,7 @@ def hr_time(seconds):
 
 
 def batchrange(nitems, maxsize):
+    """A simple generator that produces offsets and sizes for batch-loops."""
     for offset in range(0, nitems, maxsize):
         yield offset, min(nitems-offset, maxsize)
 
@@ -121,7 +121,6 @@ class Log:
         self.start = time.perf_counter()
         self.logfile = sys.stdout
         """When set to a file handle, log messages are written to it instead to standard output.
-        After each write, ``flush()`` is called as well.
         """
         self.indent = 0
         self._limit = -1
diff --git a/src/kyupy/circuit.py b/src/kyupy/circuit.py
index 409088b..46c9f38 100644
--- a/src/kyupy/circuit.py
+++ b/src/kyupy/circuit.py
@@ -226,14 +226,14 @@ class Circuit:
         """A list of all :class:`Node` objects contained in the circuit.
 
         The position of a node in this list equals its index :code:`self.nodes[42].index == 42`.
-        This list should not be changed directly.
+        This list must not be changed directly.
         Use the :class:`Node` constructor and :py:attr:`Node.remove()` to add and remove nodes.
         """
         self.lines : list[Line] = IndexList()
         """A list of all :class:`Line` objects contained in the circuit.
 
         The position of a line in this list equals its index :code:`self.lines[42].index == 42`.
-        This list should not be changed directly.
+        This list must not be changed directly.
         Use the :class:`Line` constructor and :py:attr:`Line.remove()` to add and remove lines.
         """
         self.io_nodes : list[Node] = GrowingList()
@@ -243,18 +243,27 @@ class Circuit:
         The position of a node in the io_nodes list corresponds to positions of logic values in test vectors.
         The port direction is not stored explicitly.
         Usually, nodes in the io_nodes list without any lines in their :py:attr:`Node.ins` list are primary inputs,
-        and nodes without any lines in their :py:attr:`Node.outs` list are regarded as primary outputs.
+        and all other nodes in the io_nodes list are regarded as primary outputs.
         """
         self.cells : dict[str, Node] = {}
         """A dictionary to access cells by name.
+
+        This dictionary must not be changed directly.
+        Use the :class:`Node` constructor and :py:attr:`Node.remove()` to add and remove nodes.
         """
         self.forks : dict[str, Node] = {}
         """A dictionary to access forks by name.
+
+        This dictionary must not be changed directly.
+        Use the :class:`Node` constructor and :py:attr:`Node.remove()` to add and remove nodes.
         """
 
     @property
     def s_nodes(self):
-        """A list of all io_nodes as well as all flip-flops and latches in the circuit (in that order).
+        """A list of all primary I/Os as well as all flip-flops and latches in the circuit (in that order).
+
+        The s_nodes list defines the order of all ports and all sequential elements in the circuit.
+        This list is constructed on-the-fly. If used in some inner toop, consider caching the list for better performance.
         """
         return list(self.io_nodes) + [n for n in self.nodes if 'dff' in n.kind.lower()] + [n for n in self.nodes if 'latch' in n.kind.lower()]
 
@@ -275,7 +284,7 @@ class Circuit:
     def s_locs(self, prefix):
         """Returns the indices of I/Os and sequential elements that start with given name prefix.
 
-        The returned values are used to index into the :py:attr:`s_nodes` array.
+        The returned values are used to index into the :py:attr:`s_nodes` list.
         It works the same as :py:attr:`io_locs`. See there for more details.
         """
         return self._locs(prefix, self.s_nodes)
@@ -299,7 +308,7 @@ class Circuit:
 
     @property
     def stats(self):
-        """A dictionary with the number of all different elements in the circuit.
+        """A dictionary with the counts of all different elements in the circuit.
 
         The dictionary contains the number of all different kinds of nodes, the number
         of lines, as well various sums like number of combinational gates, number of
@@ -342,6 +351,10 @@ class Circuit:
         information is not needed, such forks can be removed and the two neighbors
         can be connected directly using one line. Forks that drive more than one node
         are not removed by this function.
+
+        This function may remove some nodes and some lines from the circuit.
+        Therefore that indices of other nodes and lines may change to keep the indices consecutive.
+        It may therefore invalidate external data for nodes and lines.
         """
         ios = set(self.io_nodes)
         for n in list(self.forks.values()):
@@ -363,6 +376,11 @@ class Circuit:
         The given node will be removed, the implementation is copied in and
         the signal lines are connected appropriately. The number and arrangement
         of the input and output ports must match the pins of the replaced node.
+
+        This function tries to preserve node and line indices as much as possible.
+        Usually, it only adds additional nodes and lines, preserving the order of
+        all existing nodes and lines. If an implementation is empty, however, nodes
+        and lines may get removed, changing indices and invalidating external data.
         """
         ios = set(impl.io_nodes)
         impl_in_nodes = [n for n in impl.io_nodes if len(n.ins) == 0]
@@ -422,9 +440,11 @@ class Circuit:
 
     def resolve_tlib_cells(self, tlib):
         """Substitute all technology library cells with kyupy native simulation primitives.
+
+        See :py:attr:`substitute()` for more detail.
         """
         for n in list(self.nodes):
-            if n.kind in tlib.cells:# and 'DFF' not in n.kind and 'LATCH' not in n.kind:
+            if n.kind in tlib.cells:
                 self.substitute(n, tlib.cells[n.kind][0])
 
     def copy(self):
diff --git a/src/kyupy/logic.py b/src/kyupy/logic.py
index 7fa3bc5..ff49344 100644
--- a/src/kyupy/logic.py
+++ b/src/kyupy/logic.py
@@ -32,7 +32,7 @@ The axis convention is as follows:
 * The **last** axis goes along patterns/vectors. I.e. ``values[...,0]`` is pattern 0, ``values[...,1]`` is pattern 1, etc.
 * The **second-to-last** axis goes along the I/O and flip-flops of circuits. For a circuit ``c``, this axis is usually
   ``len(c.s_nodes)`` long. The values of all inputs, outputs and flip-flops are stored within the same array and the location
-  along the second-to-last axis is determined by the order in ``c.s_nodes``.
+  along the second-to-last axis is determined by the order in :py:attr:`~kyupy.circuit.Circuit.s_nodes`.
 
 Two storage formats are used in KyuPy:
 
diff --git a/src/kyupy/logic_sim.py b/src/kyupy/logic_sim.py
index d2a9dfe..81ef44b 100644
--- a/src/kyupy/logic_sim.py
+++ b/src/kyupy/logic_sim.py
@@ -1,7 +1,7 @@
 """A high-throughput combinational logic simulator.
 
 The class :py:class:`~kyupy.logic_sim.LogicSim` performs parallel simulations of the combinational part of a circuit.
-The logic operations are performed bit-parallel on packed numpy arrays.
+The logic operations are performed bit-parallel on packed numpy arrays (see bit-parallel (bp) array description in :py:mod:`~kyupy.logic`).
 Simple sequential circuits can be simulated by repeated assignments and propagations.
 However, this simulator ignores the clock network and simply assumes that all state-elements are clocked all the time.
 """
@@ -19,8 +19,8 @@ class LogicSim(sim.SimOps):
     :param circuit: The circuit to simulate.
     :param sims: The number of parallel logic simulations to perform.
     :param m: The arity of the logic, must be 2, 4, or 8.
-    :param c_reuse: If True, intermediate signal values may get overwritten when not needed anymore.
-    :param strip_forks: If True, forks are not included in the simulation model.
+    :param c_reuse: If True, intermediate signal values may get overwritten when not needed anymore to save memory.
+    :param strip_forks: If True, forks are not included in the simulation model to save memory and simulation time.
     """
     def __init__(self, circuit: Circuit, sims: int = 8, m: int = 8, c_reuse: bool = False, strip_forks: bool = False):
         assert m in [2, 4, 8]
@@ -34,10 +34,12 @@ class LogicSim(sim.SimOps):
         self.s = np.zeros((2, self.s_len, 3, nbytes), dtype=np.uint8)
         """Logic values of the sequential elements (flip-flops) and ports.
 
-        The elements are as follows:
+        It is a pair of arrays in bit-parallel (bp) storage format:
 
         * ``s[0]`` Assigned values. Simulator will read (P)PI value from here.
         * ``s[1]`` Result values. Simulator will write (P)PO values here.
+
+        Access this array to assign new values to the (P)PIs or read values from the (P)POs.
         """
         self.s[:,:,1,:] = 255  # unassigned
 
@@ -49,16 +51,10 @@ class LogicSim(sim.SimOps):
         """
         self.c[self.pippi_c_locs] = self.s[0, self.pippi_s_locs, :self.mdim]
 
-    def c_to_s(self):
-        """Copies the results of the combinational portion to ``s[1]``.
-        """
-        self.s[1, self.poppo_s_locs, :self.mdim] = self.c[self.poppo_c_locs]
-        if self.mdim == 1:
-            self.s[1, self.poppo_s_locs, 1:2] = self.c[self.poppo_c_locs]
-
-    def c_prop(self, sims=None, inject_cb=None):
-        """Propagate the input values towards the outputs (Perform all logic operations in topological order).
+    def c_prop(self, inject_cb=None):
+        """Propagate the input values through the combinational circuit towards the outputs.
 
+        Performs all logic operations in topological order.
         If the circuit is sequential (it contains flip-flops), one call simulates one clock cycle.
 
         :param inject_cb: A callback function for manipulating intermediate signal values.
@@ -67,13 +63,11 @@ class LogicSim(sim.SimOps):
             resumes with the manipulated values after the callback returns.
         :type inject_cb: ``f(Line, ndarray)``
         """
-        if sims is None: sims = self.sims
-        nbytes = (sims - 1) // 8 + 1
         t0 = self.c_locs[self.tmp_idx]
         t1 = self.c_locs[self.tmp2_idx]
         if self.m == 2:
             if inject_cb is None:
-                _prop_cpu(self.ops, self.c_locs, self.c[...,:nbytes])
+                _prop_cpu(self.ops, self.c_locs, self.c)
             else:
                 for op, o0, i0, i1, i2, i3 in self.ops[:,:6]:
                     o0, i0, i1, i2, i3 = [self.c_locs[x] for x in (o0, i0, i1, i2, i3)]
@@ -264,14 +258,21 @@ class LogicSim(sim.SimOps):
                 else: print(f'unknown op {op}')
                 if inject_cb is not None: inject_cb(o0, self.s[o0])
 
+    def c_to_s(self):
+        """Copies (captures) the results of the combinational portion to ``s[1]``.
+        """
+        self.s[1, self.poppo_s_locs, :self.mdim] = self.c[self.poppo_c_locs]
+        if self.mdim == 1:
+            self.s[1, self.poppo_s_locs, 1:2] = self.c[self.poppo_c_locs]
+
     def s_ppo_to_ppi(self):
         """Constructs a new assignment based on the current data in ``s``.
 
         Use this function for simulating consecutive clock cycles.
 
         For 2-valued or 4-valued simulations, all valued from PPOs (in ``s[1]``) and copied to the PPIs (in ``s[0]``).
-        For 8-valued simulations, PPI transitions are constructed from the initial values of the assignment and the
-        final values of the results.
+        For 8-valued simulations, PPI transitions are constructed from the final values of the assignment (in ``s[0]``) and the
+        final values of the results (in ``s[1]``).
         """
         # TODO: handle latches correctly
         if self.mdim < 3:
@@ -282,11 +283,10 @@ class LogicSim(sim.SimOps):
             self.s[0, self.ppio_s_locs, 2] = self.s[0, self.ppio_s_locs, 0] ^ self.s[0, self.ppio_s_locs, 1]  # TODO: not correct for X, -
 
     def cycle(self, cycles: int = 1, inject_cb=None):
-        """Assigns the given state, propagates it and captures the new state.
+        """Repeatedly assigns a state, propagates it, captures the new state, and transfers PPOs to PPIs.
 
         :param cycles: The number of cycles to simulate.
         :param inject_cb: A callback function for manipulating intermediate signal values. See :py:func:`c_prop`.
-        :returns: The given state object.
         """
         for _ in range(cycles):
             self.s_to_c()
diff --git a/src/kyupy/sdf.py b/src/kyupy/sdf.py
index 8eb093f..015f975 100644
--- a/src/kyupy/sdf.py
+++ b/src/kyupy/sdf.py
@@ -3,9 +3,8 @@
 This parser extracts pin-to-pin delay and interconnect delay information from SDF files.
 Sophisticated timing specifications (timing checks, conditional delays, etc.) are ignored.
 
-The functions :py:func:`load` and :py:func:`read` return an intermediate representation (:class:`DelayFile` object).
-Call :py:func:`DelayFile.iopaths` to match the intermediate representation to a given circuit.
-
+The functions :py:func:`parse` and :py:func:`load` return an intermediate representation (:class:`DelayFile` object).
+Call :py:func:`DelayFile.iopaths` and :py:func:`DelayFile.interconnects` to generate delay information for a given circuit.
 """
 
 from collections import namedtuple
@@ -16,6 +15,7 @@ from lark import Lark, Transformer
 
 from . import log, readtext
 from .circuit import Circuit
+from .techlib import TechLib
 
 
 Interconnect = namedtuple('Interconnect', ['orig', 'dest', 'r', 'f'])
@@ -34,17 +34,20 @@ class DelayFile:
         return '\n'.join(f'{n}: {l}' for n, l in self.cells.items()) + '\n' + \
                '\n'.join(str(i) for i in self._interconnects)
 
-    def iopaths(self, circuit:Circuit, tlib):
+    def iopaths(self, circuit:Circuit, tlib:TechLib):
         """Constructs an ndarray containing all IOPATH delays.
 
         All IOPATH delays for a node ``n`` are annotated to the line connected to the input pin specified in the IOPATH.
 
         Limited support of SDF spec:
-        * ABSOLUTE delay values only
-        * two delvals per delval_list. First is rising/posedge, second is falling/negedge
-        transition at the output of the IOPATH (SDF spec, pp. 3-17).
+
+        * Only ABSOLUTE delay values are supported.
+        * Only two delvals per delval_list is supported. First is rising/posedge, second is falling/negedge
+          transition at the output of the IOPATH (SDF spec, pp. 3-17).
         * PATHPULSE declarations are ignored.
 
+        The axes convention of KyuPy's delay data arrays is as follows:
+
         * Axis 0: dataset (usually 3 datasets per SDF-file)
         * Axis 1: line index (e.g. ``n.ins[0]``, ``n.ins[1]``)
         * Axis 2: polarity of the transition at the IOPATH-input (e.g. at ``n.ins[0]`` or ``n.ins[1]``), 0='rising/posedge', 1='falling/negedge'
@@ -75,20 +78,24 @@ class DelayFile:
 
         return np.moveaxis(delays, -1, 0)
 
-    def interconnects(self, circuit, tlib):
+    def interconnects(self, circuit:Circuit, tlib:TechLib):
         """Constructs an ndarray containing all INTERCONNECT delays.
 
         To properly annotate interconnect delays, the circuit model has to include a '__fork__' node on
         every signal and every fanout-branch. The Verilog parser aids in this by setting the parameter
-        `branchforks=True` in :py:func:`kyupy.verilog.parse` or :py:func:`kyupy.verilog.load`.
+        `branchforks=True` in :py:func:`~kyupy.verilog.parse` or :py:func:`~kyupy.verilog.load`.
 
         Limited support of SDF spec:
-        * ABSOLUTE delay values only
-        * two delvals per delval_list. First is rising/posedge, second is falling/negedge transition.
+
+        * Only ABSOLUTE delay values are supported.
+        * Only two delvals per delval_list is supported. First is rising/posedge, second is falling/negedge
+          transition.
         * PATHPULSE declarations are ignored.
 
+        The axes convention of KyuPy's delay data arrays is as follows:
+
         * Axis 0: dataset (usually 3 datasets per SDF-file)
-        * Axis 1: line index. usually input line of a __fork__
+        * Axis 1: line index. Usually input line of a __fork__.
         * Axis 2: (axis of size 2 for compatability to IOPATH results. Values are broadcast along this axis.)
         * Axis 3: polarity of the transition, 0='rising/posedge', 1='falling/negedge'
         """
diff --git a/src/kyupy/stil.py b/src/kyupy/stil.py
index 9bcc376..98cc2df 100644
--- a/src/kyupy/stil.py
+++ b/src/kyupy/stil.py
@@ -3,8 +3,8 @@
 The main purpose of this parser is to load scan pattern sets from STIL files.
 It supports only a subset of STIL.
 
-The functions :py:func:`load` and :py:func:`read` return an intermediate representation (:class:`StilFile` object).
-Call :py:func:`StilFile.tests`, :py:func:`StilFile.tests_loc`, or :py:func:`StilFile.responses` to
+The functions :py:func:`parse` and :py:func:`load` return an intermediate representation (:py:class:`StilFile` object).
+Call :py:func:`StilFile.tests()`, :py:func:`StilFile.tests_loc()`, or :py:func:`StilFile.responses()` to
 obtain the appropriate vector sets.
 """
 
@@ -90,6 +90,11 @@ class StilFile:
         """Assembles and returns a scan test pattern set for given circuit.
 
         This function assumes a static (stuck-at fault) test.
+
+        :param circuit: The circuit to assemble the patterns for. The patterns will follow the
+            :py:attr:`~kyupy.circuit.Circuit.s_nodes` ordering of the this circuit.
+        :return: A 4-valued multi-valued (mv) logic array (see :py:mod:`~kyupy.logic`).
+            The values for primary inputs and sequential elements are filled, the primary outputs are left unassigned.
         """
         interface, pi_map, _, scan_maps, scan_inversions = self._maps(circuit)
         tests = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED)
@@ -109,6 +114,20 @@ class StilFile:
         This function assumes a launch-on-capture (LoC) delay test.
         It performs a logic simulation to obtain the first capture pattern (the one that launches the delay
         test) and assembles the test pattern set from from pairs for initialization- and launch-patterns.
+
+        :param circuit: The circuit to assemble the patterns for. The patterns will follow the
+            :py:attr:`~kyupy.circuit.Circuit.s_nodes` ordering of the this circuit.
+        :param init_filter: A function for filtering the initialization patterns. This function is called
+            with the initialization patterns from the STIL file as mvarray before logic simulation.
+            It shall return an mvarray with the same shape. This function can be used, for example, to fill
+            patterns.
+        :param launch_filter: A function for filtering the launch patterns. This function is called
+            with the launch patterns generated by logic simulation before they are combined with
+            the initialization patterns to form the final 8-valued test patterns.
+            The function shall return an mvarray with the same shape. This function can be used, for example, to fill
+            patterns.
+        :return: An 8-valued multi-valued (mv) logic array (see :py:mod:`~kyupy.logic`). The values for primary
+            inputs and sequential elements are filled, the primary outputs are left unassigned.
         """
         interface, pi_map, po_map, scan_maps, scan_inversions = self._maps(circuit)
         init = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED)
@@ -142,7 +161,13 @@ class StilFile:
         return logic.mv_transition(init, launch)
 
     def responses(self, circuit):
-        """Assembles and returns a scan test response pattern set for given circuit."""
+        """Assembles and returns a scan test response pattern set for given circuit.
+
+        :param circuit: The circuit to assemble the patterns for. The patterns will follow the
+            :py:attr:`~kyupy.circuit.Circuit.s_nodes` ordering of the this circuit.
+        :return: A 4-valued multi-valued (mv) logic array (see :py:mod:`~kyupy.logic`).
+            The values for primary outputs and sequential elements are filled, the primary inputs are left unassigned.
+        """
         interface, _, po_map, scan_maps, scan_inversions = self._maps(circuit)
         resp = np.full((len(interface), len(self.patterns)), logic.UNASSIGNED)
         for i, p in enumerate(self.patterns):
diff --git a/src/kyupy/techlib.py b/src/kyupy/techlib.py
index 8a9886a..ce15ed1 100644
--- a/src/kyupy/techlib.py
+++ b/src/kyupy/techlib.py
@@ -1,21 +1,19 @@
+"""KyuPy's Built-In Technology Libraries
+
+Technology libraries provide cell definitions and their implementation with simulation primitives.
+A couple of common standard cell libraries are built-in.
+Others can be easily added by providing a bench-like description of the cells.
+"""
+
 import re
 from itertools import product
 
-from .circuit import Node, Line
 from . import bench
 
 
-class TechLib:
-    """Provides some information specific to standard cell libraries necessary
-    for loading gate-level designs. :py:class:`~kyupy.circuit.Node` objects do not
-    have pin names. The methods defined here map pin names to pin directions and defined
-    positions in the ``node.ins`` and ``node.outs`` lists. The default implementation
-    provides mappings for SAED-inspired standard cell libraries.
-    """
-
+class TechLibOld:
     @staticmethod
     def pin_index(kind, pin):
-        """Returns a pin list position for a given node kind and pin name."""
         if isinstance(pin, int):
             return max(0, pin-1)
         if kind[:3] in ('OAI', 'AOI'):
@@ -51,16 +49,24 @@ class TechLib:
 
     @staticmethod
     def pin_is_output(kind, pin):
-        """Returns True, if given pin name of a node kind is an output."""
         if isinstance(pin, int):
             return pin == 0
         if 'MUX' in kind and pin == 'S': return False
         return pin in ('Q', 'QN', 'Z', 'ZN', 'Y', 'CO', 'S', 'SO', 'C1')
 
 
-class TechLibNew:
+class TechLib:
+    """Class for standard cell library definitions.
+
+    :py:class:`~kyupy.circuit.Node` objects do not have pin names.
+    This class maps pin names to pin directions and defined positions in the ``node.ins`` and ``node.outs`` lists.
+    Furthermore, it gives access to implementations of complex cells. See also :py:func:`~kyupy.circuit.substitute` and
+    :py:func:`~kyupy.circuit.resolve_tlib_cells`.
+    """
     def __init__(self, lib_src):
         self.cells = dict()
+        """A dictionary with pin definitions and circuits for each cell kind (type).
+        """
         for c_str in re.split(r';\s+', lib_src):
             c_str = re.sub(r'^\s+', '', c_str)
             name_len = c_str.find(' ')
@@ -82,16 +88,56 @@ class TechLibNew:
                 self.cells[name] = (c, pin_dict)
 
     def pin_index(self, kind, pin):
+        """Returns a pin list position for a given node kind and pin name."""
         assert kind in self.cells, f'Unknown cell: {kind}'
         assert pin in self.cells[kind][1], f'Unknown pin: {pin} for cell {kind}'
         return self.cells[kind][1][pin][0]
 
     def pin_is_output(self, kind, pin):
+        """Returns True, if given pin name of a node kind is an output."""
         assert kind in self.cells, f'Unknown cell: {kind}'
         assert pin in self.cells[kind][1], f'Unknown pin: {pin} for cell {kind}'
         return self.cells[kind][1][pin][1]
 
 
+GSC180 = TechLib(r"""
+BUFX{1,3}      input(A)    output(Y) Y=BUF1(A)    ;
+CLKBUFX{1,2,3} input(A)    output(Y) Y=BUF1(A)    ;
+INVX{1,2,4,8}  input(A)    output(Y) Y=INV1(A)    ;
+TBUFX{1,2,4,8} input(A,OE) output(Y) Y=AND2(A,OE) ;
+TINVX1         input(A,OE) output(Y) AB=INV1(A) Y=AND2(AB,OE) ;
+
+AND2X1      input(A,B)     output(Y) Y=AND2(A,B)      ;
+NAND2X{1,2} input(A,B)     output(Y) Y=NAND2(A,B)     ;
+NAND3X1     input(A,B,C)   output(Y) Y=NAND3(A,B,C)   ;
+NAND4X1     input(A,B,C,D) output(Y) Y=NAND4(A,B,C,D) ;
+OR2X1       input(A,B)     output(Y) Y=OR2(A,B)       ;
+OR4X1       input(A,B,C,D) output(Y) Y=OR4(A,B,C,D)   ;
+NOR2X1      input(A,B)     output(Y) Y=NOR2(A,B)      ;
+NOR3X1      input(A,B,C)   output(Y) Y=NOR3(A,B,C)    ;
+NOR4X1      input(A,B,C,D) output(Y) Y=NOR4(A,B,C,D)  ;
+XOR2X1      input(A,B)     output(Y) Y=XOR2(A,B)      ;
+
+MX2X1   input(A,B,S0)            output(Y)    Y=MUX21(A,B,S0)      ;
+AOI21X1 input(A0,A1,B0)          output(Y)    Y=AOI21(A0,A1,B0)    ;
+AOI22X1 input(A0,A1,B0,B1)       output(Y)    Y=AOI22(A0,A1,B0,B1) ;
+OAI21X1 input(A0,A1,B0)          output(Y)    Y=OAI21(A0,A1,B0)    ;
+OAI22X1 input(A0,A1,B0,B1)       output(Y)    Y=OAI22(A0,A1,B0,B1) ;
+OAI33X1 input(A0,A1,A2,B0,B1,B2) output(Y)    AA=OR2(A0,A1) BB=OR2(B0,B1) Y=OAI22(AA,A2,BB,B2) ;
+ADDFX1  input(A,B,CI)            output(CO,S) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(AB,CI,A,B)    ;
+ADDHX1  input(A,B)               output(CO,S) CO=XOR2(A,B) S=AND2(A,B)                         ;
+
+DFFX1    input(CK,D)             output(Q,QN) Q=DFF(D,CK) QN=INV1(Q) ;
+DFFSRX1  input(CK,D,RN,SN)       output(Q,QN) DR=AND2(D,RN) SET=INV1(SN) DRS=OR2(DR,SET) Q=DFF(DRS,CK) QN=INV1(Q) ;
+SDFFSRX1 input(CK,D,RN,SE,SI,SN) output(Q,QN) DR=AND2(D,RN) SET=INV1(SN) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CK) QN=INV1(Q) ;
+
+TLATSRX1 input(D,G,RN,SN) output(Q,QN) DR=AND2(D,RN) SET=INV1(SN) DRS=OR2(DR,SET) Q=LATCH(DRS,G) QN=INV1(Q) ;
+TLATX1   input(C,D)       output(Q,QN) Q=LATCH(D,C) QN=INV1(Q) ;
+""")
+"""The GSC 180nm generic standard cell library.
+"""
+
+
 _nangate_common = r"""
 FILLCELL_X{1,2,4,8,16,32} ;
 
@@ -153,21 +199,7 @@ DLL_X{1,2} input(D,GN) output(Q) G=INV1(GN) Q=LATCH(D,G) ;
 """
 
 
-NANGATE_ZN = TechLibNew(_nangate_common + r"""
-INV_X{1,2,4,8,16,32}  input(A) output(ZN) ZN=INV1(A) ;
-
-AND2_X{1,2,4}  input(A1,A2)       output(ZN) ZN=AND2(A1,A2)        ;
-AND3_X{1,2,4}  input(A1,A2,A3)    output(ZN) ZN=AND3(A1,A2,A3)     ;
-AND4_X{1,2,4}  input(A1,A2,A3,A4) output(ZN) ZN=AND4(A1,A2,A3,A4)  ;
-OR2_X{1,2,4}   input(A1,A2)       output(ZN) ZN=OR2(A1,A2)         ;
-OR3_X{1,2,4}   input(A1,A2,A3)    output(ZN) ZN=OR3(A1,A2,A3)      ;
-OR4_X{1,2,4}   input(A1,A2,A3,A4) output(ZN) ZN=OR4(A1,A2,A3,A4)   ;
-XOR2_X{1,2}    input(A,B)         output(Z)  Z=XOR2(A,B)           ;
-XNOR2_X{1,2}   input(A,B)         output(ZN) ZN=XNOR2(A,B)         ;
-""")
-
-
-NANGATE = TechLibNew(_nangate_common + r"""
+NANGATE = TechLib(_nangate_common + r"""
 INV_X{1,2,4,8,16,32}  input(I) output(ZN) ZN=INV1(I) ;
 
 AND2_X{1,2,4}  input(A1,A2)       output(Z)  Z=AND2(A1,A2)        ;
@@ -179,99 +211,27 @@ OR4_X{1,2,4}   input(A1,A2,A3,A4) output(Z)  Z=OR4(A1,A2,A3,A4)   ;
 XOR2_X{1,2}    input(A1,A2)       output(Z)  Z=XOR2(A1,A2)        ;
 XNOR2_X{1,2}   input(A1,A2)       output(ZN) ZN=XNOR2(A1,A2)      ;
 """)
+"""An newer NANGATE-variant that uses 'Z' as output pin names for AND and OR gates.
+"""
 
 
-# SAED90nm and SAED32nm libraries.
-# not included here: negative-edge flip-flops, tri-state, latches, clock gating, level shifters
-
-SAED90 = TechLibNew(r"""
-NBUFFX{2,4,8,16,32}$ input(INP) output(Z) Z=BUF1(INP) ;
-AOBUFX{1,2,4}$       input(INP) output(Z) Z=BUF1(INP) ;
-DELLN{1,2,3}X2$      input(INP) output(Z)Z=BUF1(INP) ;
-
-INVX{0,1,2,4,8,16,32}$ input(INP) output(ZN) ZN=INV1(INP) ;
-AOINVX{1,2,4}$         input(INP) output(ZN) ZN=INV1(INP) ;
-IBUFFX{2,4,8,16,32}$   input(INP) output(ZN) ZN=INV1(INP) ;
-
-TIEH$ output(Z)   Z=__const1__() ;
-TIEL$ output(ZN) ZN=__const0__() ;
-
-HEAD2X{2,4,8,16,32}$ input(SLEEP) output(SLEEPOUT) SLEEPOUT=BUF1(SLEEP) ;
-HEADX{2,4,8,16,32}$  input(SLEEP) ;
-
-ANTENNA$ input(INP)   ;
-CLOAD1$  input(INP)   ;
-DCAP$                 ;
-DHFILL{HLH,LHL}2      ;
-DHFILLHLHLS11$        ;
-SHFILL{1,2,3,64,128}$ ;
-
-AND2X{1,2,4}$    input(IN1,IN2)         output(Q)   Q=AND2(IN1,IN2)          ;
-AND3X{1,2,4}$    input(IN1,IN2,IN3)     output(Q)   Q=AND3(IN1,IN2,IN3)      ;
-AND4X{1,2,4}$    input(IN1,IN2,IN3,IN4) output(Q)   Q=AND4(IN1,IN2,IN3,IN4)  ;
-OR2X{1,2,4}$     input(IN1,IN2)         output(Q)   Q=OR2(IN1,IN2)           ;
-OR3X{1,2,4}$     input(IN1,IN2,IN3)     output(Q)   Q=OR3(IN1,IN2,IN3)       ;
-OR4X{1,2,4}$     input(IN1,IN2,IN3,IN4) output(Q)   Q=OR4(IN1,IN2,IN3,IN4)   ;
-XOR2X{1,2}$      input(IN1,IN2)         output(Q)   Q=XOR2(IN1,IN2)          ;
-XOR3X{1,2}$      input(IN1,IN2,IN3)     output(Q)   Q=XOR3(IN1,IN2,IN3)      ;
-NAND2X{0,1,2,4}$ input(IN1,IN2)         output(QN) QN=NAND2(IN1,IN2)         ;
-NAND3X{0,1,2,4}$ input(IN1,IN2,IN3)     output(QN) QN=NAND3(IN1,IN2,IN3)     ;
-NAND4X{0,1}$     input(IN1,IN2,IN3,IN4) output(QN) QN=NAND4(IN1,IN2,IN3,IN4) ;
-NOR2X{0,1,2,4}$  input(IN1,IN2)         output(QN) QN=NOR2(IN1,IN2)          ;
-NOR3X{0,1,2,4}$  input(IN1,IN2,IN3)     output(QN) QN=NOR3(IN1,IN2,IN3)      ;
-NOR4X{0,1}$      input(IN1,IN2,IN3,IN4) output(QN) QN=NOR4(IN1,IN2,IN3,IN4)  ;
-XNOR2X{1,2}$     input(IN1,IN2)         output(Q)   Q=XNOR2(IN1,IN2)         ;
-XNOR3X{1,2}$     input(IN1,IN2,IN3)     output(Q)   Q=XNOR3(IN1,IN2,IN3)     ;
-
-ISOLAND{,AO}X{1,2,4,8}$ input(ISO,D) output(Q) ISOB=NOT1(ISO) Q=AND2(ISOB,D) ;
-ISOLOR{,AO}X{1,2,4,8}$  input(ISO,D) output(Q) Q=OR2(ISO,D)  ;
-
-AO21X{1,2}$  input(IN1,IN2,IN3) output(Q)   Q=AO21(IN1,IN2,IN3)  ;
-OA21X{1,2}$  input(IN1,IN2,IN3) output(Q)   Q=OA21(IN1,IN2,IN3)  ;
-AOI21X{1,2}$ input(IN1,IN2,IN3) output(QN) QN=AOI21(IN1,IN2,IN3) ;
-OAI21X{1,2}$ input(IN1,IN2,IN3) output(QN) QN=OAI21(IN1,IN2,IN3) ;
-
-AO22X{1,2}$  input(IN1,IN2,IN3,IN4) output(Q)   Q=AO22(IN1,IN2,IN3,IN4)  ;
-OA22X{1,2}$  input(IN1,IN2,IN3,IN4) output(Q)   Q=OA22(IN1,IN2,IN3,IN4)  ;
-AOI22X{1,2}$ input(IN1,IN2,IN3,IN4) output(QN) QN=AOI22(IN1,IN2,IN3,IN4) ;
-OAI22X{1,2}$ input(IN1,IN2,IN3,IN4) output(QN) QN=OAI22(IN1,IN2,IN3,IN4) ;
-
-MUX21X{1,2}$ input(IN1,IN2,S) output(Q) Q=MUX21(IN1,IN2,S) ;
-
-AO221X{1,2}$  input(IN1,IN2,IN3,IN4,IN5) output(Q)  A=AO22(IN1,IN2,IN3,IN4)  Q=OR2(IN5,A)   ;
-OA221X{1,2}$  input(IN1,IN2,IN3,IN4,IN5) output(Q)  A=OA22(IN1,IN2,IN3,IN4)  Q=AND2(IN5,A)  ;
-AOI221X{1,2}$ input(IN1,IN2,IN3,IN4,IN5) output(QN) A=AO22(IN1,IN2,IN3,IN4) QN=NOR2(IN5,A)  ;
-OAI221X{1,2}$ input(IN1,IN2,IN3,IN4,IN5) output(QN) A=OA22(IN1,IN2,IN3,IN4) QN=NAND2(IN5,A) ;
-
-AO222X{1,2}$ input(IN1,IN2,IN3,IN4,IN5,IN6)  output(Q)  A=AO22(IN1,IN2,IN3,IN4)  Q=AO21(IN5,IN6,A)  ;
-OA222X{1,2}$ input(IN1,IN2,IN3,IN4,IN5,IN6)  output(Q)  A=OA22(IN1,IN2,IN3,IN4)  Q=OA21(IN5,IN6,A)  ;
-AOI222X{1,2}$ input(IN1,IN2,IN3,IN4,IN5,IN6) output(QN) A=AO22(IN1,IN2,IN3,IN4) QN=AOI21(IN5,IN6,A) ;
-OAI222X{1,2}$ input(IN1,IN2,IN3,IN4,IN5,IN6) output(QN) A=OA22(IN1,IN2,IN3,IN4) QN=OAI21(IN5,IN6,A) ;
-
-MUX41X{1,2}$ input(IN1,IN2,IN3,IN4,S0,S1) output(Q) A=MUX21(IN1,IN2,S0) B=MUX21(IN3,IN4,S0) Q=MUX21(A,B,S1) ;
-
-DEC24X{1,2}$ input(IN1,IN2) output(Q0,Q1,Q2,Q3) IN1B=INV1(IN1) IN2B=INV1(IN2) Q0=NOR2(IN1,IN2) Q1=AND(IN1,IN2B) Q2=AND(IN1B,IN2) Q3=AND(IN1,IN2) ;
-FADDX{1,2}$ input(A,B,CI) output(S,CO) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(AB,CI,A,B) ;
-HADDX{1,2}$ input(A0,B0) output(SO,C1) C1=XOR2(A0,B0) SO=AND2(A0,B0) ;
-
-{,AO}DFFARX{1,2}$ input(D,CLK,RSTB)      output(Q,QN) DR=AND2(D,RSTB) Q=DFF(DR,CLK) QN=INV1(Q) ;
-DFFASRX{1,2}$     input(D,CLK,RSTB,SETB) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) Q=DFF(DRS,CLK) QN=INV1(Q) ;
-DFFASX{1,2}$      input(D,CLK,SETB)      output(Q,QN) SET=INV1(SETB) DS=OR2(D,SET) Q=DFF(DS,CLK) QN=INV1(Q) ;
-DFFSSRX{1,2}$     input(CLK,D,RSTB,SETB) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) Q=DFF(DRS,CLK) QN=INV1(Q) ;
-DFFX{1,2}$        input(D,CLK)           output(Q,QN) Q=DFF(D,CLK) QN=INV1(Q) ;
-
-SDFFARX{1,2}$   input(D,CLK,RSTB,SE,SI)      output(Q,QN) DR=AND2(D,RSTB) DI=MUX21(DR,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
-SDFFASRSX{1,2}$ input(D,CLK,RSTB,SETB,SE,SI) output(Q,QN,S0) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) S0=BUF1(Q) ;
-SDFFASRX{1,2}$  input(D,CLK,RSTB,SETB,SE,SI) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
-SDFFASX{1,2}$   input(D,CLK,SETB,SE,SI)      output(Q,QN) SET=INV1(SETB) DS=OR2(D,SET) DI=MUX21(DS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
-SDFFSSRX{1,2}$  input(CLK,D,RSTB,SETB,SI,SE) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
-SDFFX{1,2}$     input(D,CLK,SE,SI)           output(Q,QN) DI=MUX21(D,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
+NANGATE_ZN = TechLib(_nangate_common + r"""
+INV_X{1,2,4,8,16,32}  input(A) output(ZN) ZN=INV1(A) ;
 
-LATCHX{1,2}$ input(D,CLK) output(Q,QN) Q=LATCH(D,CLK) QN=INV1(Q) ;
-""".replace('$','{,_LVT,_HVT}'))
+AND2_X{1,2,4}  input(A1,A2)       output(ZN) ZN=AND2(A1,A2)        ;
+AND3_X{1,2,4}  input(A1,A2,A3)    output(ZN) ZN=AND3(A1,A2,A3)     ;
+AND4_X{1,2,4}  input(A1,A2,A3,A4) output(ZN) ZN=AND4(A1,A2,A3,A4)  ;
+OR2_X{1,2,4}   input(A1,A2)       output(ZN) ZN=OR2(A1,A2)         ;
+OR3_X{1,2,4}   input(A1,A2,A3)    output(ZN) ZN=OR3(A1,A2,A3)      ;
+OR4_X{1,2,4}   input(A1,A2,A3,A4) output(ZN) ZN=OR4(A1,A2,A3,A4)   ;
+XOR2_X{1,2}    input(A,B)         output(Z)  Z=XOR2(A,B)           ;
+XNOR2_X{1,2}   input(A,B)         output(ZN) ZN=XNOR2(A,B)         ;
+""")
+"""An older NANGATE-variant that uses 'ZN' as output pin names for AND and OR gates.
+"""
 
 
-SAED32 = TechLibNew(r"""
+SAED32 = TechLib(r"""
 NBUFFX{2,4,8,16,32}$ input(A) output(Y) Y=BUF1(A) ;
 AOBUFX{1,2,4}$       input(A) output(Y) Y=BUF1(A) ;
 DELLN{1,2,3}X2$      input(A) output(Y) Y=BUF1(A) ;
@@ -360,38 +320,96 @@ SDFFX{1,2}$     input(D,CLK,SE,SI)           output(Q,QN) DI=MUX21(D,SI,SE) Q=DF
 
 LATCHX{1,2}$ input(D,CLK) output(Q,QN) Q=LATCH(D,CLK) QN=INV1(Q) ;
 """.replace('$','_RVT'))
+"""The SAED 32nm educational technology library.
+It defines all cells except: negative-edge flip-flops, tri-state, latches, clock gating, level shifters
+"""
 
-GSC180 = TechLibNew(r"""
-BUFX{1,3}      input(A)    output(Y) Y=BUF1(A)    ;
-CLKBUFX{1,2,3} input(A)    output(Y) Y=BUF1(A)    ;
-INVX{1,2,4,8}  input(A)    output(Y) Y=INV1(A)    ;
-TBUFX{1,2,4,8} input(A,OE) output(Y) Y=AND2(A,OE) ;
-TINVX1         input(A,OE) output(Y) AB=INV1(A) Y=AND2(AB,OE) ;
 
-AND2X1      input(A,B)     output(Y) Y=AND2(A,B)      ;
-NAND2X{1,2} input(A,B)     output(Y) Y=NAND2(A,B)     ;
-NAND3X1     input(A,B,C)   output(Y) Y=NAND3(A,B,C)   ;
-NAND4X1     input(A,B,C,D) output(Y) Y=NAND4(A,B,C,D) ;
-OR2X1       input(A,B)     output(Y) Y=OR2(A,B)       ;
-OR4X1       input(A,B,C,D) output(Y) Y=OR4(A,B,C,D)   ;
-NOR2X1      input(A,B)     output(Y) Y=NOR2(A,B)      ;
-NOR3X1      input(A,B,C)   output(Y) Y=NOR3(A,B,C)    ;
-NOR4X1      input(A,B,C,D) output(Y) Y=NOR4(A,B,C,D)  ;
-XOR2X1      input(A,B)     output(Y) Y=XOR2(A,B)      ;
+SAED90 = TechLib(r"""
+NBUFFX{2,4,8,16,32}$ input(INP) output(Z) Z=BUF1(INP) ;
+AOBUFX{1,2,4}$       input(INP) output(Z) Z=BUF1(INP) ;
+DELLN{1,2,3}X2$      input(INP) output(Z)Z=BUF1(INP) ;
 
-MX2X1   input(A,B,S0)            output(Y)    Y=MUX21(A,B,S0)      ;
-AOI21X1 input(A0,A1,B0)          output(Y)    Y=AOI21(A0,A1,B0)    ;
-AOI22X1 input(A0,A1,B0,B1)       output(Y)    Y=AOI22(A0,A1,B0,B1) ;
-OAI21X1 input(A0,A1,B0)          output(Y)    Y=OAI21(A0,A1,B0)    ;
-OAI22X1 input(A0,A1,B0,B1)       output(Y)    Y=OAI22(A0,A1,B0,B1) ;
-OAI33X1 input(A0,A1,A2,B0,B1,B2) output(Y)    AA=OR2(A0,A1) BB=OR2(B0,B1) Y=OAI22(AA,A2,BB,B2) ;
-ADDFX1  input(A,B,CI)            output(CO,S) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(AB,CI,A,B)    ;
-ADDHX1  input(A,B)               output(CO,S) CO=XOR2(A,B) S=AND2(A,B)                         ;
+INVX{0,1,2,4,8,16,32}$ input(INP) output(ZN) ZN=INV1(INP) ;
+AOINVX{1,2,4}$         input(INP) output(ZN) ZN=INV1(INP) ;
+IBUFFX{2,4,8,16,32}$   input(INP) output(ZN) ZN=INV1(INP) ;
 
-DFFX1    input(CK,D)             output(Q,QN) Q=DFF(D,CK) QN=INV1(Q) ;
-DFFSRX1  input(CK,D,RN,SN)       output(Q,QN) DR=AND2(D,RN) SET=INV1(SN) DRS=OR2(DR,SET) Q=DFF(DRS,CK) QN=INV1(Q) ;
-SDFFSRX1 input(CK,D,RN,SE,SI,SN) output(Q,QN) DR=AND2(D,RN) SET=INV1(SN) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CK) QN=INV1(Q) ;
+TIEH$ output(Z)   Z=__const1__() ;
+TIEL$ output(ZN) ZN=__const0__() ;
 
-TLATSRX1 input(D,G,RN,SN) output(Q,QN) DR=AND2(D,RN) SET=INV1(SN) DRS=OR2(DR,SET) Q=LATCH(DRS,G) QN=INV1(Q) ;
-TLATX1   input(C,D)       output(Q,QN) Q=LATCH(D,C) QN=INV1(Q) ;
-""")
\ No newline at end of file
+HEAD2X{2,4,8,16,32}$ input(SLEEP) output(SLEEPOUT) SLEEPOUT=BUF1(SLEEP) ;
+HEADX{2,4,8,16,32}$  input(SLEEP) ;
+
+ANTENNA$ input(INP)   ;
+CLOAD1$  input(INP)   ;
+DCAP$                 ;
+DHFILL{HLH,LHL}2      ;
+DHFILLHLHLS11$        ;
+SHFILL{1,2,3,64,128}$ ;
+
+AND2X{1,2,4}$    input(IN1,IN2)         output(Q)   Q=AND2(IN1,IN2)          ;
+AND3X{1,2,4}$    input(IN1,IN2,IN3)     output(Q)   Q=AND3(IN1,IN2,IN3)      ;
+AND4X{1,2,4}$    input(IN1,IN2,IN3,IN4) output(Q)   Q=AND4(IN1,IN2,IN3,IN4)  ;
+OR2X{1,2,4}$     input(IN1,IN2)         output(Q)   Q=OR2(IN1,IN2)           ;
+OR3X{1,2,4}$     input(IN1,IN2,IN3)     output(Q)   Q=OR3(IN1,IN2,IN3)       ;
+OR4X{1,2,4}$     input(IN1,IN2,IN3,IN4) output(Q)   Q=OR4(IN1,IN2,IN3,IN4)   ;
+XOR2X{1,2}$      input(IN1,IN2)         output(Q)   Q=XOR2(IN1,IN2)          ;
+XOR3X{1,2}$      input(IN1,IN2,IN3)     output(Q)   Q=XOR3(IN1,IN2,IN3)      ;
+NAND2X{0,1,2,4}$ input(IN1,IN2)         output(QN) QN=NAND2(IN1,IN2)         ;
+NAND3X{0,1,2,4}$ input(IN1,IN2,IN3)     output(QN) QN=NAND3(IN1,IN2,IN3)     ;
+NAND4X{0,1}$     input(IN1,IN2,IN3,IN4) output(QN) QN=NAND4(IN1,IN2,IN3,IN4) ;
+NOR2X{0,1,2,4}$  input(IN1,IN2)         output(QN) QN=NOR2(IN1,IN2)          ;
+NOR3X{0,1,2,4}$  input(IN1,IN2,IN3)     output(QN) QN=NOR3(IN1,IN2,IN3)      ;
+NOR4X{0,1}$      input(IN1,IN2,IN3,IN4) output(QN) QN=NOR4(IN1,IN2,IN3,IN4)  ;
+XNOR2X{1,2}$     input(IN1,IN2)         output(Q)   Q=XNOR2(IN1,IN2)         ;
+XNOR3X{1,2}$     input(IN1,IN2,IN3)     output(Q)   Q=XNOR3(IN1,IN2,IN3)     ;
+
+ISOLAND{,AO}X{1,2,4,8}$ input(ISO,D) output(Q) ISOB=NOT1(ISO) Q=AND2(ISOB,D) ;
+ISOLOR{,AO}X{1,2,4,8}$  input(ISO,D) output(Q) Q=OR2(ISO,D)  ;
+
+AO21X{1,2}$  input(IN1,IN2,IN3) output(Q)   Q=AO21(IN1,IN2,IN3)  ;
+OA21X{1,2}$  input(IN1,IN2,IN3) output(Q)   Q=OA21(IN1,IN2,IN3)  ;
+AOI21X{1,2}$ input(IN1,IN2,IN3) output(QN) QN=AOI21(IN1,IN2,IN3) ;
+OAI21X{1,2}$ input(IN1,IN2,IN3) output(QN) QN=OAI21(IN1,IN2,IN3) ;
+
+AO22X{1,2}$  input(IN1,IN2,IN3,IN4) output(Q)   Q=AO22(IN1,IN2,IN3,IN4)  ;
+OA22X{1,2}$  input(IN1,IN2,IN3,IN4) output(Q)   Q=OA22(IN1,IN2,IN3,IN4)  ;
+AOI22X{1,2}$ input(IN1,IN2,IN3,IN4) output(QN) QN=AOI22(IN1,IN2,IN3,IN4) ;
+OAI22X{1,2}$ input(IN1,IN2,IN3,IN4) output(QN) QN=OAI22(IN1,IN2,IN3,IN4) ;
+
+MUX21X{1,2}$ input(IN1,IN2,S) output(Q) Q=MUX21(IN1,IN2,S) ;
+
+AO221X{1,2}$  input(IN1,IN2,IN3,IN4,IN5) output(Q)  A=AO22(IN1,IN2,IN3,IN4)  Q=OR2(IN5,A)   ;
+OA221X{1,2}$  input(IN1,IN2,IN3,IN4,IN5) output(Q)  A=OA22(IN1,IN2,IN3,IN4)  Q=AND2(IN5,A)  ;
+AOI221X{1,2}$ input(IN1,IN2,IN3,IN4,IN5) output(QN) A=AO22(IN1,IN2,IN3,IN4) QN=NOR2(IN5,A)  ;
+OAI221X{1,2}$ input(IN1,IN2,IN3,IN4,IN5) output(QN) A=OA22(IN1,IN2,IN3,IN4) QN=NAND2(IN5,A) ;
+
+AO222X{1,2}$ input(IN1,IN2,IN3,IN4,IN5,IN6)  output(Q)  A=AO22(IN1,IN2,IN3,IN4)  Q=AO21(IN5,IN6,A)  ;
+OA222X{1,2}$ input(IN1,IN2,IN3,IN4,IN5,IN6)  output(Q)  A=OA22(IN1,IN2,IN3,IN4)  Q=OA21(IN5,IN6,A)  ;
+AOI222X{1,2}$ input(IN1,IN2,IN3,IN4,IN5,IN6) output(QN) A=AO22(IN1,IN2,IN3,IN4) QN=AOI21(IN5,IN6,A) ;
+OAI222X{1,2}$ input(IN1,IN2,IN3,IN4,IN5,IN6) output(QN) A=OA22(IN1,IN2,IN3,IN4) QN=OAI21(IN5,IN6,A) ;
+
+MUX41X{1,2}$ input(IN1,IN2,IN3,IN4,S0,S1) output(Q) A=MUX21(IN1,IN2,S0) B=MUX21(IN3,IN4,S0) Q=MUX21(A,B,S1) ;
+
+DEC24X{1,2}$ input(IN1,IN2) output(Q0,Q1,Q2,Q3) IN1B=INV1(IN1) IN2B=INV1(IN2) Q0=NOR2(IN1,IN2) Q1=AND(IN1,IN2B) Q2=AND(IN1B,IN2) Q3=AND(IN1,IN2) ;
+FADDX{1,2}$ input(A,B,CI) output(S,CO) AB=XOR2(A,B) CO=XOR2(AB,CI) S=AO22(AB,CI,A,B) ;
+HADDX{1,2}$ input(A0,B0) output(SO,C1) C1=XOR2(A0,B0) SO=AND2(A0,B0) ;
+
+{,AO}DFFARX{1,2}$ input(D,CLK,RSTB)      output(Q,QN) DR=AND2(D,RSTB) Q=DFF(DR,CLK) QN=INV1(Q) ;
+DFFASRX{1,2}$     input(D,CLK,RSTB,SETB) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) Q=DFF(DRS,CLK) QN=INV1(Q) ;
+DFFASX{1,2}$      input(D,CLK,SETB)      output(Q,QN) SET=INV1(SETB) DS=OR2(D,SET) Q=DFF(DS,CLK) QN=INV1(Q) ;
+DFFSSRX{1,2}$     input(CLK,D,RSTB,SETB) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) Q=DFF(DRS,CLK) QN=INV1(Q) ;
+DFFX{1,2}$        input(D,CLK)           output(Q,QN) Q=DFF(D,CLK) QN=INV1(Q) ;
+
+SDFFARX{1,2}$   input(D,CLK,RSTB,SE,SI)      output(Q,QN) DR=AND2(D,RSTB) DI=MUX21(DR,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
+SDFFASRSX{1,2}$ input(D,CLK,RSTB,SETB,SE,SI) output(Q,QN,S0) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) S0=BUF1(Q) ;
+SDFFASRX{1,2}$  input(D,CLK,RSTB,SETB,SE,SI) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
+SDFFASX{1,2}$   input(D,CLK,SETB,SE,SI)      output(Q,QN) SET=INV1(SETB) DS=OR2(D,SET) DI=MUX21(DS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
+SDFFSSRX{1,2}$  input(CLK,D,RSTB,SETB,SI,SE) output(Q,QN) DR=AND2(D,RSTB) SET=INV1(SETB) DRS=OR2(DR,SET) DI=MUX21(DRS,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
+SDFFX{1,2}$     input(D,CLK,SE,SI)           output(Q,QN) DI=MUX21(D,SI,SE) Q=DFF(DI,CLK) QN=INV1(Q) ;
+
+LATCHX{1,2}$ input(D,CLK) output(Q,QN) Q=LATCH(D,CLK) QN=INV1(Q) ;
+""".replace('$','{,_LVT,_HVT}'))
+"""The SAED 90nm educational technology library.
+It defines all cells except: negative-edge flip-flops, tri-state, latches, clock gating, level shifters
+"""
diff --git a/src/kyupy/verilog.py b/src/kyupy/verilog.py
index 0852786..1c6a0d1 100644
--- a/src/kyupy/verilog.py
+++ b/src/kyupy/verilog.py
@@ -10,7 +10,7 @@ from lark import Lark, Transformer, Tree
 
 from . import log, readtext
 from .circuit import Circuit, Node, Line
-from .techlib import TechLib
+from .techlib import NANGATE
 
 Instantiation = namedtuple('Instantiation', ['type', 'name', 'pins'])
 
@@ -35,7 +35,7 @@ class SignalDeclaration:
 
 
 class VerilogTransformer(Transformer):
-    def __init__(self, branchforks=False, tlib=TechLib()):
+    def __init__(self, branchforks=False, tlib=NANGATE):
         super().__init__()
         self.branchforks = branchforks
         self.tlib = tlib
@@ -232,23 +232,29 @@ GRAMMAR = r"""
     """
 
 
-def parse(text, *, branchforks=False, tlib=TechLib()):
+def parse(text, tlib=NANGATE, branchforks=False):
     """Parses the given ``text`` as Verilog code.
 
     :param text: A string with Verilog code.
+    :param tlib: A technology library object that defines all known cells.
+    :type tlib: :py:class:`~kyupy.techlib.TechLib`
     :param branchforks: If set to ``True``, the returned circuit will include additional `forks` on each fanout branch.
         These forks are needed to correctly annotate interconnect delays
-        (see :py:func:`kyupy.sdf.DelayFile.annotation`).
-    :param tlib: A technology library object that provides pin name mappings.
-    :type tlib: :py:class:`~kyupy.techlib.TechLib`
-    :return: A :class:`~kyupy.circuit.Circuit` object.
+        (see :py:func:`~kyupy.sdf.DelayFile.interconnects()`).
+    :return: A :py:class:`~kyupy.circuit.Circuit` object.
     """
     return Lark(GRAMMAR, parser="lalr", transformer=VerilogTransformer(branchforks, tlib)).parse(text)
 
 
-def load(file, *args, **kwargs):
+def load(file, tlib=NANGATE, branchforks=False):
     """Parses the contents of ``file`` as Verilog code.
 
-    The given file may be gzip compressed. Takes the same keyword arguments as :py:func:`parse`.
+    :param file: A file name or a file handle. Files with `.gz`-suffix are decompressed on-the-fly.
+    :param tlib: A technology library object that defines all known cells.
+    :type tlib: :py:class:`~kyupy.techlib.TechLib`
+    :param branchforks: If set to ``True``, the returned circuit will include additional `forks` on each fanout branch.
+        These forks are needed to correctly annotate interconnect delays
+        (see :py:func:`~kyupy.sdf.DelayFile.interconnects()`).
+    :return: A :py:class:`~kyupy.circuit.Circuit` object.
     """
-    return parse(readtext(file), *args, **kwargs)
+    return parse(readtext(file), tlib, branchforks)
diff --git a/src/kyupy/wave_sim.py b/src/kyupy/wave_sim.py
index 4c0656c..93a107f 100644
--- a/src/kyupy/wave_sim.py
+++ b/src/kyupy/wave_sim.py
@@ -16,7 +16,7 @@ import math
 
 import numpy as np
 
-from . import numba, cuda, hr_bytes, sim, cdiv
+from . import numba, cuda, sim, cdiv
 
 
 TMAX = np.float32(2 ** 127)
@@ -32,7 +32,8 @@ class WaveSim(sim.SimOps):
     """A waveform-based combinational logic timing simulator running on CPU.
 
     :param circuit: The circuit to simulate.
-    :param timing: The timing annotation of the circuit (see :py:func:`kyupy.sdf.DelayFile.annotation` for details)
+    :param delays: One or more delay annotations for the circuit (see :py:func:`kyupy.sdf.DelayFile.iopaths` for details).
+        Each parallel simulation may use the same delays or different delays, depending on the use-case (see :py:attr:`simctl_int`).
     :param sims: The number of parallel simulations.
     :param c_caps: The number of floats available in each waveform. Values must be positive and a multiple of 4.
         Waveforms encode the signal switching history by storing transition times.
@@ -40,12 +41,16 @@ class WaveSim(sim.SimOps):
         that can be stored. A capacity of ``n`` can store at least ``n-2`` transitions. If more transitions are
         generated during simulation, the latest glitch is removed (freeing up two transition times) and an overflow
         flag is set. If an integer is given, all waveforms are set to that same capacity. With an array of length
-        ``len(circuit.lines)`` the capacity is set for each intermediate waveform individually.
+        ``len(circuit.lines)`` the capacity is set individually for each intermediate waveform.
+    :param a_ctrl: An integer array controlling the accumulation of weighted switching activity during simulation.
+        Its shape must be ``(len(circuit.lines), 3)``. ``a_ctrl[...,0]`` is the index into the accumulation buffer, -1 means ignore.
+        ``a_ctrl[...,1]`` is the (integer) weight for a rising transition, ``a_ctrl[...,2]`` is the (integer) weight for
+        a falling transition. The accumulation buffer (:py:attr:`abuf`) is allocated automatically if ``a_ctrl`` is given.
+    :param c_reuse: If enabled, memory of intermediate signal waveforms will be re-used. This greatly reduces
+        memory footprint, but intermediate signal waveforms may become unaccessible after a propagation.
     :param strip_forks: If enabled, the simulator will not evaluate fork nodes explicitly. This saves simulation time
-        by reducing the number of nodes to simulate, but (interconnect) delay annotations of lines read by fork nodes
+        and memory by reducing the number of nodes to simulate, but (interconnect) delay annotations of lines read by fork nodes
         are ignored.
-    :param c_reuse: If enabled, memory of intermediate signal waveforms will be re-used. This greatly reduces
-        memory footprint, but intermediate signal waveforms become unaccessible after a propagation.
     """
     def __init__(self, circuit, delays, sims=8, c_caps=16, a_ctrl=None, c_reuse=False, strip_forks=False):
         super().__init__(circuit, c_caps=c_caps, c_caps_min=4, a_ctrl=a_ctrl, c_reuse=c_reuse, strip_forks=strip_forks)
@@ -58,8 +63,8 @@ class WaveSim(sim.SimOps):
         self.s = np.zeros((11, self.s_len, sims), dtype=np.float32)
         """Information about the logic values and transitions around the sequential elements (flip-flops) and ports.
 
-        The first 3 values are read by ``s_to_c()``.
-        The remaining values are written by ``c_to_s()``.
+        The first 3 values are read by :py:func:`s_to_c`.
+        The remaining values are written by :py:func:`c_to_s`.
 
         The elements are as follows:
 
@@ -82,10 +87,13 @@ class WaveSim(sim.SimOps):
         self.abuf = np.zeros((self.abuf_len, sims), dtype=np.int32) if self.abuf_len > 0 else np.zeros((1, 1), dtype=np.int32)
 
         self.simctl_int = np.zeros((2, sims), dtype=np.int32)
-        """Per-simulation delay configuration.
+        """Integer array for per-simulation delay configuration.
 
-        * ``simctl_int[0]`` delay dataset or random seed for picking a delay
-        * ``simctl_int[1]`` 0: seed parameter specifies dataset, 1: simctl_int[0] specifies dataset, 2: simctl_int[0] + seed param. is a random seed for picking a delay dataset
+        * ``simctl_int[0]`` delay dataset or random seed for picking a delay. By default, each sim has a unique seed.
+        * ``simctl_int[1]`` Method for picking a delay:
+            * 0: seed parameter of :py:func:`c_prop` directly specifies dataset for all simulations
+            * 1: ``simctl_int[0]`` specifies dataset on a per-simulation basis
+            * 2 (default): ``simctl_int[0]`` and seed parameter of :py:func:`c_prop` together are a random seed for picking a delay dataset.
         """
         self.simctl_int[0] = range(sims)  # unique seed for each sim by default, zero this to pick same delays for all sims.
         self.simctl_int[1] = 2  # random picking by default.
@@ -100,8 +108,7 @@ class WaveSim(sim.SimOps):
     def s_to_c(self):
         """Transfers values of sequential elements and primary inputs to the combinational portion.
 
-        Based on the data in ``self.s``, waveforms are generated on the input lines of the circuit.
-        It modifies ``self.c``.
+        Waveforms are generated on the input lines of the combinational circuit based on the data in :py:attr:`s`.
         """
         sins = self.s[:, self.pippi_s_locs]
         cond = (sins[2] != 0) + 2*(sins[0] != 0)  # choices order: 0 R F 1
@@ -113,7 +120,7 @@ class WaveSim(sim.SimOps):
         """Propagates all waveforms from the (pseudo) primary inputs to the (pseudo) primary outputs.
 
         :param sims: Number of parallel simulations to execute. If None, all available simulations are performed.
-        :param seed: Random seed for delay variations.
+        :param seed: Seed for picking delays. See also: :py:attr:`simctl_int`.
         """
         sims = min(sims or self.sims, self.sims)
         for op_start, op_stop in zip(self.level_starts, self.level_stops):
@@ -122,8 +129,8 @@ class WaveSim(sim.SimOps):
     def c_to_s(self, time=TMAX, sd=0.0, seed=1):
         """Simulates a capture operation at all sequential elements and primary outputs.
 
-        Propagated waveforms in ``self.c`` at and around the given capture time are analyzed and
-        the results are stored in ``self.s``.
+        Propagated waveforms at the outputs of the combinational circuit at and around the given capture time are analyzed and
+        the results are stored in :py:attr:`s`.
 
         :param time: The desired capture time. By default, a capture of the settled value is performed.
         :param sd: A standard deviation for uncertainty in the actual capture time.
@@ -134,9 +141,9 @@ class WaveSim(sim.SimOps):
                 self.s[3:, s_loc, vector] = wave_capture_cpu(self.c, c_loc, c_len, vector, time=time, sd=sd, seed=seed)
 
     def s_ppo_to_ppi(self, time=0.0):
-        """Re-assigns the last sampled capture to the appropriate pseudo-primary inputs (PPI).
-        Each PPI transition is constructed from its previous final value, the
-        given time, and the sampled captured value of its PPO. Reads and modifies ``self.s``.
+        """Re-assigns the last sampled capture of the PPOs to the appropriate pseudo-primary inputs (PPIs).
+        Each PPI transition is constructed from the final value of the previous assignment, the
+        given time, and the sampled captured value of its PPO. Reads and modifies :py:attr:`s`.
 
         :param time: The transition time at the inputs (usually 0.0).
         """
@@ -322,7 +329,8 @@ def wave_capture_cpu(c, c_loc, c_len, vector, time=TMAX, sd=0.0, seed=1):
 class WaveSimCuda(WaveSim):
     """A GPU-accelerated waveform-based combinational logic timing simulator.
 
-    The API is the same as for :py:class:`WaveSim`.
+    The API is identical to :py:class:`WaveSim`. See there for complete documentation.
+
     All internal memories are mirrored into GPU memory upon construction.
     Some operations like access to single waveforms can involve large communication overheads.
     """
@@ -431,7 +439,6 @@ def wave_eval_gpu(ops, op_start, op_stop, cbuf, c_locs, c_caps, abuf, sim_start,
 
     # accumulate WSA into abuf
     if a_loc >= 0:
-        #abuf[a_loc, sim] += nrise*a_wr + nfall*a_wf
         cuda.atomic.add(abuf, (a_loc, sim), nrise*a_wr + nfall*a_wf)
 
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 0f6fce7..c73bdc5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -8,13 +8,13 @@ def mydir():
     return Path(os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))))
 
 @pytest.fixture(scope='session')
-def b14_circuit(mydir):
+def b15_2ig_circuit(mydir):
     from kyupy import verilog
     from kyupy.techlib import SAED32
-    return verilog.load(mydir / 'b14.v.gz', branchforks=True, tlib=SAED32)
+    return verilog.load(mydir / 'b15_2ig.v.gz', branchforks=True, tlib=SAED32)
 
 @pytest.fixture(scope='session')
-def b14_delays(mydir, b14_circuit):
+def b15_2ig_delays(mydir, b15_2ig_circuit):
     from kyupy import sdf
     from kyupy.techlib import SAED32
-    return sdf.load(mydir / 'b14.sdf.gz').iopaths(b14_circuit, tlib=SAED32)[1:2]
+    return sdf.load(mydir / 'b15_2ig.sdf.gz').iopaths(b15_2ig_circuit, tlib=SAED32)[1:2]
diff --git a/tests/rng_haltonBase2.synth_yosys.v b/tests/rng_haltonBase2.synth_yosys.v
new file mode 100644
index 0000000..dec16b1
--- /dev/null
+++ b/tests/rng_haltonBase2.synth_yosys.v
@@ -0,0 +1,335 @@
+/* Generated by Yosys 0.9 (git sha1 UNKNOWN, gcc 4.8.5 -fPIC -Os) */
+
+(* top =  1  *)
+(* src = "rng_haltonBase2.v:1" *)
+module rng1(clk, reset, o_output);
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  wire [11:0] _00_;
+  wire _01_;
+  wire _02_;
+  wire _03_;
+  wire _04_;
+  wire _05_;
+  wire _06_;
+  wire _07_;
+  wire _08_;
+  wire _09_;
+  wire _10_;
+  wire _11_;
+  wire _12_;
+  wire _13_;
+  wire _14_;
+  wire _15_;
+  wire _16_;
+  wire _17_;
+  wire _18_;
+  wire _19_;
+  wire _20_;
+  wire _21_;
+  wire _22_;
+  wire _23_;
+  wire _24_;
+  wire _25_;
+  wire _26_;
+  wire _27_;
+  wire _28_;
+  wire _29_;
+  wire _30_;
+  wire _31_;
+  wire _32_;
+  wire _33_;
+  wire _34_;
+  (* src = "rng_haltonBase2.v:2" *)
+  input clk;
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:12" *)
+  wire \halton.clk ;
+  (* init = 12'h000 *)
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:17" *)
+  wire [11:0] \halton.counter ;
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:14" *)
+  wire [11:0] \halton.o_output ;
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:13" *)
+  wire \halton.reset ;
+  (* src = "rng_haltonBase2.v:4" *)
+  output [11:0] o_output;
+  (* src = "rng_haltonBase2.v:3" *)
+  input reset;
+  AND2X1 _35_ (
+    .IN1(\halton.counter [1]),
+    .IN2(\halton.counter [0]),
+    .Q(_01_)
+  );
+  NOR2X0 _36_ (
+    .IN1(\halton.counter [1]),
+    .IN2(\halton.counter [0]),
+    .QN(_02_)
+  );
+  NOR3X0 _37_ (
+    .IN1(reset),
+    .IN2(_01_),
+    .IN3(_02_),
+    .QN(_00_[1])
+  );
+  AND2X1 _38_ (
+    .IN1(\halton.counter [2]),
+    .IN2(_01_),
+    .Q(_03_)
+  );
+  NOR2X0 _39_ (
+    .IN1(\halton.counter [2]),
+    .IN2(_01_),
+    .QN(_04_)
+  );
+  NOR3X0 _40_ (
+    .IN1(reset),
+    .IN2(_03_),
+    .IN3(_04_),
+    .QN(_00_[2])
+  );
+  AND4X1 _41_ (
+    .IN1(\halton.counter [1]),
+    .IN2(\halton.counter [0]),
+    .IN3(\halton.counter [2]),
+    .IN4(\halton.counter [3]),
+    .Q(_05_)
+  );
+  NOR2X0 _42_ (
+    .IN1(\halton.counter [3]),
+    .IN2(_03_),
+    .QN(_06_)
+  );
+  NOR3X0 _43_ (
+    .IN1(reset),
+    .IN2(_05_),
+    .IN3(_06_),
+    .QN(_00_[3])
+  );
+  AND2X1 _44_ (
+    .IN1(\halton.counter [4]),
+    .IN2(_05_),
+    .Q(_07_)
+  );
+  NOR2X0 _45_ (
+    .IN1(\halton.counter [4]),
+    .IN2(_05_),
+    .QN(_08_)
+  );
+  NOR3X0 _46_ (
+    .IN1(reset),
+    .IN2(_07_),
+    .IN3(_08_),
+    .QN(_00_[4])
+  );
+  AND2X1 _47_ (
+    .IN1(\halton.counter [5]),
+    .IN2(_07_),
+    .Q(_09_)
+  );
+  NOR2X0 _48_ (
+    .IN1(\halton.counter [5]),
+    .IN2(_07_),
+    .QN(_10_)
+  );
+  NOR3X0 _49_ (
+    .IN1(reset),
+    .IN2(_09_),
+    .IN3(_10_),
+    .QN(_00_[5])
+  );
+  AND4X1 _50_ (
+    .IN1(\halton.counter [4]),
+    .IN2(\halton.counter [5]),
+    .IN3(\halton.counter [6]),
+    .IN4(_05_),
+    .Q(_11_)
+  );
+  NOR2X0 _51_ (
+    .IN1(\halton.counter [6]),
+    .IN2(_09_),
+    .QN(_12_)
+  );
+  NOR3X0 _52_ (
+    .IN1(reset),
+    .IN2(_11_),
+    .IN3(_12_),
+    .QN(_00_[6])
+  );
+  AND2X1 _53_ (
+    .IN1(\halton.counter [7]),
+    .IN2(_11_),
+    .Q(_13_)
+  );
+  NOR2X0 _54_ (
+    .IN1(\halton.counter [7]),
+    .IN2(_11_),
+    .QN(_14_)
+  );
+  NOR3X0 _55_ (
+    .IN1(reset),
+    .IN2(_13_),
+    .IN3(_14_),
+    .QN(_00_[7])
+  );
+  AND3X1 _56_ (
+    .IN1(\halton.counter [7]),
+    .IN2(\halton.counter [8]),
+    .IN3(_11_),
+    .Q(_15_)
+  );
+  NOR2X0 _57_ (
+    .IN1(\halton.counter [8]),
+    .IN2(_13_),
+    .QN(_16_)
+  );
+  NOR3X0 _58_ (
+    .IN1(reset),
+    .IN2(_15_),
+    .IN3(_16_),
+    .QN(_00_[8])
+  );
+  AND4X1 _59_ (
+    .IN1(\halton.counter [7]),
+    .IN2(\halton.counter [8]),
+    .IN3(\halton.counter [9]),
+    .IN4(_11_),
+    .Q(_17_)
+  );
+  NOR2X0 _60_ (
+    .IN1(\halton.counter [9]),
+    .IN2(_15_),
+    .QN(_18_)
+  );
+  NOR3X0 _61_ (
+    .IN1(reset),
+    .IN2(_17_),
+    .IN3(_18_),
+    .QN(_00_[9])
+  );
+  AND2X1 _62_ (
+    .IN1(\halton.counter [10]),
+    .IN2(_17_),
+    .Q(_19_)
+  );
+  NOR2X0 _63_ (
+    .IN1(\halton.counter [10]),
+    .IN2(_17_),
+    .QN(_20_)
+  );
+  NOR3X0 _64_ (
+    .IN1(reset),
+    .IN2(_19_),
+    .IN3(_20_),
+    .QN(_00_[10])
+  );
+  AND3X1 _65_ (
+    .IN1(\halton.counter [10]),
+    .IN2(\halton.counter [11]),
+    .IN3(_17_),
+    .Q(_21_)
+  );
+  AOI21X1 _66_ (
+    .IN1(\halton.counter [10]),
+    .IN2(_17_),
+    .IN3(\halton.counter [11]),
+    .QN(_22_)
+  );
+  NOR3X0 _67_ (
+    .IN1(reset),
+    .IN2(_21_),
+    .IN3(_22_),
+    .QN(_00_[11])
+  );
+  NOR2X0 _68_ (
+    .IN1(reset),
+    .IN2(\halton.counter [0]),
+    .QN(_00_[0])
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _69_ (
+    .CLK(clk),
+    .D(_00_[0]),
+    .Q(\halton.counter [0]),
+    .QN(_23_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _70_ (
+    .CLK(clk),
+    .D(_00_[1]),
+    .Q(\halton.counter [1]),
+    .QN(_24_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _71_ (
+    .CLK(clk),
+    .D(_00_[2]),
+    .Q(\halton.counter [2]),
+    .QN(_25_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _72_ (
+    .CLK(clk),
+    .D(_00_[3]),
+    .Q(\halton.counter [3]),
+    .QN(_26_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _73_ (
+    .CLK(clk),
+    .D(_00_[4]),
+    .Q(\halton.counter [4]),
+    .QN(_27_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _74_ (
+    .CLK(clk),
+    .D(_00_[5]),
+    .Q(\halton.counter [5]),
+    .QN(_28_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _75_ (
+    .CLK(clk),
+    .D(_00_[6]),
+    .Q(\halton.counter [6]),
+    .QN(_29_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _76_ (
+    .CLK(clk),
+    .D(_00_[7]),
+    .Q(\halton.counter [7]),
+    .QN(_30_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _77_ (
+    .CLK(clk),
+    .D(_00_[8]),
+    .Q(\halton.counter [8]),
+    .QN(_31_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _78_ (
+    .CLK(clk),
+    .D(_00_[9]),
+    .Q(\halton.counter [9]),
+    .QN(_32_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _79_ (
+    .CLK(clk),
+    .D(_00_[10]),
+    .Q(\halton.counter [10]),
+    .QN(_33_)
+  );
+  (* src = "rng_haltonBase2.v:7|rng_haltonBase2.v:19" *)
+  DFFX1 _80_ (
+    .CLK(clk),
+    .D(_00_[11]),
+    .Q(\halton.counter [11]),
+    .QN(_34_)
+  );
+  assign \halton.clk  = clk;
+  assign \halton.o_output  = { \halton.counter [0], \halton.counter [1], \halton.counter [2], \halton.counter [3], \halton.counter [4], \halton.counter [5], \halton.counter [6], \halton.counter [7], \halton.counter [8], \halton.counter [9], \halton.counter [10], \halton.counter [11] };
+  assign \halton.reset  = reset;
+  assign o_output = { \halton.counter [0], \halton.counter [1], \halton.counter [2], \halton.counter [3], \halton.counter [4], \halton.counter [5], \halton.counter [6], \halton.counter [7], \halton.counter [8], \halton.counter [9], \halton.counter [10], \halton.counter [11] };
+endmodule
diff --git a/tests/test_sdf.py b/tests/test_sdf.py
index f57a5a5..e94285b 100644
--- a/tests/test_sdf.py
+++ b/tests/test_sdf.py
@@ -72,12 +72,11 @@ def test_parse():
     '''
     df = sdf.parse(test)
     assert df.name == 'test'
-    # print(f'DelayFile(name={df.name}, interconnects={len(df.interconnects)}, iopaths={len(df.iopaths)})')
 
 
-def test_b14(mydir):
-    df = sdf.load(mydir / 'b14.sdf.gz')
-    assert df.name == 'b14'
+def test_b15(mydir):
+    df = sdf.load(mydir / 'b15_2ig.sdf.gz')
+    assert df.name == 'b15'
 
 
 def test_gates(mydir):
diff --git a/tests/test_stil.py b/tests/test_stil.py
index 0b26259..f179493 100644
--- a/tests/test_stil.py
+++ b/tests/test_stil.py
@@ -1,21 +1,21 @@
 from kyupy import stil, verilog
+from kyupy.techlib import SAED32
 
+def test_b15(mydir):
+    b15 = verilog.load(mydir / 'b15_2ig.v.gz', tlib=SAED32)
 
-def test_b14(mydir):
-    b14 = verilog.load(mydir / 'b14.v.gz')
-
-    s = stil.load(mydir / 'b14.stuck.stil.gz')
+    s = stil.load(mydir / 'b15_2ig.sa_nf.stil.gz')
     assert len(s.signal_groups) == 10
     assert len(s.scan_chains) == 1
-    assert len(s.calls) == 2163
-    tests = s.tests(b14)
-    resp = s.responses(b14)
+    assert len(s.calls) == 1357
+    tests = s.tests(b15)
+    resp = s.responses(b15)
     assert len(tests) > 0
     assert len(resp) > 0
 
-    s2 = stil.load(mydir / 'b14.transition.stil.gz')
-    tests = s2.tests_loc(b14)
-    resp = s2.responses(b14)
+    s2 = stil.load(mydir / 'b15_2ig.tf_nf.stil.gz')
+    tests = s2.tests_loc(b15)
+    resp = s2.responses(b15)
     assert len(tests) > 0
     assert len(resp) > 0
 
diff --git a/tests/test_verilog.py b/tests/test_verilog.py
index 67ec9a6..87bbe73 100644
--- a/tests/test_verilog.py
+++ b/tests/test_verilog.py
@@ -1,11 +1,11 @@
 from kyupy import verilog
-
+from kyupy.techlib import SAED90, SAED32
 
 def test_b01(mydir):
     with open(mydir / 'b01.v', 'r') as f:
-        c = verilog.parse(f.read())
+        c = verilog.parse(f.read(), tlib=SAED90)
     assert c is not None
-    assert verilog.load(mydir / 'b01.v') is not None
+    assert verilog.load(mydir / 'b01.v', tlib=SAED90) is not None
 
     assert len(c.nodes) == 139
     assert len(c.lines) == 203
@@ -16,7 +16,7 @@ def test_b01(mydir):
 
 
 def test_b15(mydir):
-    c = verilog.load(mydir / 'b15_4ig.v.gz')
+    c = verilog.load(mydir / 'b15_4ig.v.gz', tlib=SAED32)
     assert len(c.nodes) == 12067
     assert len(c.lines) == 20731
     stats = c.stats
@@ -26,7 +26,7 @@ def test_b15(mydir):
 
 
 def test_gates(mydir):
-    c = verilog.load(mydir / 'gates.v')
+    c = verilog.load(mydir / 'gates.v', tlib=SAED90)
     assert len(c.nodes) == 10
     assert len(c.lines) == 10
     stats = c.stats
@@ -36,7 +36,7 @@ def test_gates(mydir):
 
 
 def test_halton2(mydir):
-    c = verilog.load(mydir / 'rng_haltonBase2.synth_yosys.v')
+    c = verilog.load(mydir / 'rng_haltonBase2.synth_yosys.v', tlib=SAED90)
     assert len(c.nodes) == 146
     assert len(c.lines) == 210
     stats = c.stats
diff --git a/tests/test_wave_sim.py b/tests/test_wave_sim.py
index 407b90d..9a09b32 100644
--- a/tests/test_wave_sim.py
+++ b/tests/test_wave_sim.py
@@ -156,16 +156,13 @@ def compare_to_logic_sim(wsim: WaveSim):
     np.testing.assert_allclose(resp, exp)
 
 
-def test_b14(b14_circuit, b14_delays):
-    compare_to_logic_sim(WaveSim(b14_circuit, b14_delays, 8))
+def test_b15(b15_2ig_circuit, b15_2ig_delays):
+    compare_to_logic_sim(WaveSim(b15_2ig_circuit, b15_2ig_delays, 8))
 
 
-def test_b14_strip_forks(b14_circuit, b14_delays):
-    compare_to_logic_sim(WaveSim(b14_circuit, b14_delays, 8, strip_forks=True))
+def test_b15_strip_forks(b15_2ig_circuit, b15_2ig_delays):
+    compare_to_logic_sim(WaveSim(b15_2ig_circuit, b15_2ig_delays, 8, strip_forks=True))
 
 
-def test_b14_cuda(b14_circuit, b14_delays):
-    compare_to_logic_sim(WaveSimCuda(b14_circuit, b14_delays, 8, strip_forks=True))
-
-if __name__ == '__main__':
-    test_nand_delays()
\ No newline at end of file
+def test_b15_cuda(b15_2ig_circuit, b15_2ig_delays):
+    compare_to_logic_sim(WaveSimCuda(b15_2ig_circuit, b15_2ig_delays, 8, strip_forks=True))