From c9445f2d79d81f404688c68f71fb63802ca64f31 Mon Sep 17 00:00:00 2001
From: Stefan Holst <mail@s-holst.de>
Date: Sat, 16 Jan 2021 14:48:27 +0900
Subject: [PATCH] Docs, __index__, fault injection and TechLib

- Documentation improvements
- Node and Line objects now provide __index__
- LogicSim cleanup and improvements (inject_cb, cycle, ...)
- Introduce TechLib class to organize tech-specific info
- More human-readable output
- De-linting
---
 Demo.ipynb              |  24 ++--
 LICENSE.txt             |   2 +-
 docs/conf.py            |   4 +-
 docs/index.rst          |   1 +
 docs/miscellaneous.rst  |  10 ++
 docs/simulators.rst     |   3 +
 setup.py                |   3 +-
 src/kyupy/__init__.py   | 141 +++++++++++++++----
 src/kyupy/bench.py      |  14 +-
 src/kyupy/circuit.py    |  41 +++---
 src/kyupy/logic.py      | 244 ++++++++++++++++++++------------
 src/kyupy/logic_sim.py  | 292 +++++++++++++++++---------------------
 src/kyupy/saed.py       | 289 --------------------------------------
 src/kyupy/sdf.py        |  76 +++++-----
 src/kyupy/stil.py       |  36 ++---
 src/kyupy/techlib.py    | 301 ++++++++++++++++++++++++++++++++++++++++
 src/kyupy/verilog.py    |  52 ++++---
 src/kyupy/wave_sim.py   | 157 ++++++++++++++++-----
 tests/test_bench.py     |   4 +-
 tests/test_logic_sim.py |   4 +-
 tests/test_sdf.py       |  19 ++-
 tests/test_stil.py      |   7 +-
 tests/test_wave_sim.py  |  49 ++++---
 23 files changed, 1002 insertions(+), 771 deletions(-)
 create mode 100644 docs/miscellaneous.rst
 delete mode 100644 src/kyupy/saed.py
 create mode 100644 src/kyupy/techlib.py
diff --git a/Demo.ipynb b/Demo.ipynb
index 288f1bd..805c60c 100644
--- a/Demo.ipynb
+++ b/Demo.ipynb
@@ -44,7 +44,7 @@
     {
      "data": {
       "text/plain": [
-       "<Circuit 'tests/b01.bench' with 92 nodes, 130 lines, 4 ports>"
+       "<Circuit tests/b01.bench cells=45 forks=47 lines=130 ports=4>"
       ]
      },
      "execution_count": 2,
@@ -64,7 +64,7 @@
     {
      "data": {
       "text/plain": [
-       "<Circuit with 10 nodes, 8 lines, 5 ports>"
+       "<Circuit cells=4 forks=6 lines=8 ports=5>"
       ]
      },
      "execution_count": 3,
@@ -362,7 +362,7 @@
     {
      "data": {
       "text/plain": [
-       "<Circuit 'b14' with 31715 nodes, 46891 lines, 91 ports>"
+       "<Circuit b14 cells=15873 forks=15842 lines=46891 ports=91>"
       ]
      },
      "execution_count": 13,
@@ -445,7 +445,7 @@
     {
      "data": {
       "text/plain": [
-       "<Circuit 'b14' with 31715 nodes, 46891 lines, 91 ports>"
+       "<Circuit b14 cells=15873 forks=15842 lines=46891 ports=91>"
       ]
      },
      "execution_count": 15,
@@ -489,11 +489,11 @@
     "\n",
     "for cell in b14.topological_order():\n",
     "    if 'DFF' in cell.kind or 'input' == cell.kind:\n",
-    "        levels[cell.index] = 0\n",
+    "        levels[cell] = 0\n",
     "    elif '__fork__' == cell.kind:\n",
-    "        levels[cell.index] = levels[cell.ins[0].driver.index]  # forks only have exactly one driver\n",
+    "        levels[cell] = levels[cell.ins[0].driver]  # forks only have exactly one driver\n",
     "    else:\n",
-    "        levels[cell.index] = max([levels[line.driver.index] for line in cell.ins]) + 1\n",
+    "        levels[cell] = max([levels[line.driver] for line in cell.ins]) + 1\n",
     "        \n",
     "print(f'Maximum logic depth: {np.max(levels)}')"
    ]
@@ -591,7 +591,7 @@
     {
      "data": {
       "text/plain": [
-       "<MVArray length=1081 width=306 m=8 nbytes=330786>"
+       "<MVArray length=1081 width=306 m=8 mem=323.0kiB>"
       ]
      },
      "execution_count": 19,
@@ -697,7 +697,7 @@
     {
      "data": {
       "text/plain": [
-       "<BPArray length=1081 width=306 m=8 bytes=124848>"
+       "<BPArray length=1081 width=306 m=8 mem=121.9kiB>"
       ]
      },
      "execution_count": 23,
@@ -829,7 +829,7 @@
     {
      "data": {
       "text/plain": [
-       "<MVArray length=1392 width=306 m=8 nbytes=425952>"
+       "<MVArray length=1392 width=306 m=8 mem=416.0kiB>"
       ]
      },
      "execution_count": 29,
@@ -962,10 +962,9 @@
    "outputs": [],
    "source": [
     "from kyupy import sdf\n",
-    "from kyupy.saed import pin_index\n",
     "\n",
     "df = sdf.load('tests/b14.sdf.gz')\n",
-    "lt = df.annotation(b14, pin_index, dataset=0, interconnect=False)"
+    "lt = df.annotation(b14, dataset=0, interconnect=False)"
    ]
   },
   {
@@ -1118,6 +1117,7 @@
    "metadata": {},
    "source": [
     "The capture data contains for each PI, PO, and scan flip-flop (axis 0), and each test (axis 1) seven values:\n",
+    "\n",
     "0. Probability of capturing a 1 at the given capture time (same as next value, if no standard deviation given).\n",
     "1. A capture value decided by random sampling according to above probability.\n",
     "2. The final value (assume a very late capture time).\n",
diff --git a/LICENSE.txt b/LICENSE.txt
index 1e4a002..293fa79 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2020 s-holst
+Copyright (c) 2020-2021 Stefan Holst
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/docs/conf.py b/docs/conf.py
index cb2e436..540783b 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -20,11 +20,11 @@ sys.path.insert(0, os.path.abspath('../src'))
 # -- Project information -----------------------------------------------------
 
 project = 'KyuPy'
-copyright = '2020, Stefan Holst'
+copyright = '2020-2021, Stefan Holst'
 author = 'Stefan Holst'
 
 # The full version, including alpha/beta/rc tags
-release = '0.0.2'
+release = '0.0.3'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/docs/index.rst b/docs/index.rst
index 3caa343..3fc74e8 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -9,4 +9,5 @@ API Reference
    datastructures
    parsers
    simulators
+   miscellaneous
 
diff --git a/docs/miscellaneous.rst b/docs/miscellaneous.rst
new file mode 100644
index 0000000..fff469f
--- /dev/null
+++ b/docs/miscellaneous.rst
@@ -0,0 +1,10 @@
+Miscellaneous
+=============
+
+.. automodule:: kyupy
+   :members:
+
+.. automodule:: kyupy.techlib
+   :members:
+
+
diff --git a/docs/simulators.rst b/docs/simulators.rst
index 8d5f6b6..bcc0ea4 100644
--- a/docs/simulators.rst
+++ b/docs/simulators.rst
@@ -4,6 +4,8 @@ Simulators
 Logic Simulation - :mod:`kyupy.logic_sim`
 -----------------------------------------
 
+.. automodule:: kyupy.logic_sim
+
 .. autoclass:: kyupy.logic_sim.LogicSim
    :members:
 
@@ -12,6 +14,7 @@ Timing Simulation - :mod:`kyupy.wave_sim`
 -----------------------------------------
 
 .. automodule:: kyupy.wave_sim
+   :members: TMAX, TMAX_OVL, TMIN
 
 .. autoclass:: kyupy.wave_sim.WaveSim
    :members:
diff --git a/setup.py b/setup.py
index 9a0bb1b..efb49ec 100644
--- a/setup.py
+++ b/setup.py
@@ -5,9 +5,10 @@ with open('README.rst', 'r') as f:
 
 setup(
     name='kyupy',
-    version='0.0.2',
+    version='0.0.3',
     description='High-performance processing and analysis of non-hierarchical VLSI designs',
     long_description=long_description,
+    long_description_content_type='text/x-rst',
     packages=find_packages(where='src'),
     package_dir={'': 'src'},
     url='https://github.com/s-holst/kyupy',
diff --git a/src/kyupy/__init__.py b/src/kyupy/__init__.py
index d1bb8db..8bbfc9f 100644
--- a/src/kyupy/__init__.py
+++ b/src/kyupy/__init__.py
@@ -1,6 +1,8 @@
 """A package for processing and analysis of non-hierarchical gate-level VLSI designs.
 
-It contains fundamental building blocks for research software in the fields of VLSI test, diagnosis and reliability.
+The kyupy package itself contains a logger and other simple utility functions.
+In addition, it defines a ``numba`` and a ``cuda`` objects that point to the actual packages
+if they are available and otherwise point to mocks.
 """
 
 import time
@@ -10,10 +12,78 @@ import gzip
 import numpy as np
 
 
+_pop_count_lut = np.asarray([bin(x).count('1') for x in range(256)])
+
+
+def popcount(a):
+    """Returns the number of 1-bits in a given packed numpy array."""
+    return np.sum(_pop_count_lut[a])
+
+
+def readtext(file):
+    """Reads and returns the text in a given file. Transparently decompresses \\*.gz files."""
+    if hasattr(file, 'read'):
+        return file.read()
+    if str(file).endswith('.gz'):
+        with gzip.open(file, 'rt') as f:
+            return f.read()
+    else:
+        with open(file, 'rt') as f:
+            return f.read()
+
+
+def hr_sci(value):
+    """Formats a value in a human-readible scientific notation."""
+    multiplier = 0
+    while abs(value) >= 1000:
+        value /= 1000
+        multiplier += 1
+    while abs(value) < 1:
+        value *= 1000
+        multiplier -= 1
+    return f'{value:.3f}{" kMGTPEafpnµm"[multiplier]}'
+
+
+def hr_bytes(nbytes):
+    """Formats a given number of bytes for human readability."""
+    multiplier = 0
+    while abs(nbytes) >= 1000:
+        nbytes /= 1024
+        multiplier += 1
+    return f'{nbytes:.1f}{["", "ki", "Mi", "Gi", "Ti", "Pi"][multiplier]}B'
+
+
+def hr_time(seconds):
+    """Formats a given time interval for human readability."""
+    s = ''
+    if seconds >= 86400:
+        d = seconds // 86400
+        seconds -= d * 86400
+        s += f'{int(d)}d'
+    if seconds >= 3600:
+        h = seconds // 3600
+        seconds -= h * 3600
+        s += f'{int(h)}h'
+    if seconds >= 60:
+        m = seconds // 60
+        seconds -= m * 60
+        if 'd' not in s:
+            s += f'{int(m)}m'
+    if 'h' not in s and 'd' not in s:
+        s += f'{int(seconds)}s'
+    return s
+
+
 class Log:
+    """A very simple logger that formats the messages with the number of seconds since
+    program start.
+    """
     def __init__(self):
         self.start = time.perf_counter()
         self.logfile = None
+        """When set to a file handle, log messages are written to it instead to standard output.
+        After each write, ``flush()`` is called as well.
+        """
 
     def log(self, level, message):
         t = time.perf_counter() - self.start
@@ -23,15 +93,45 @@ class Log:
             self.logfile.write(f'{t:011.3f} {level} {message}\n')
             self.logfile.flush()
 
-    def info(self, message): self.log('-', message)
-
-    def warn(self, message): self.log('W', message)
-
-    def error(self, message): self.log('E', message)
+    def info(self, message):
+        """Log an informational message."""
+        self.log('-', message)
+
+    def warn(self, message):
+        """Log a warning message."""
+        self.log('W', message)
+
+    def error(self, message):
+        """Log an error message."""
+        self.log('E', message)
+
+    def range(self, *args):
+        """A generator that operates just like the ``range()`` built-in, and also occasionally logs the progress
+        and compute time estimates."""
+        elems = len(range(*args))
+        start_time = time.perf_counter()
+        lastlog_time = start_time
+        log_interval = 5
+        for elem, i in enumerate(range(*args)):
+            yield i
+            current_time = time.perf_counter()
+            if current_time > lastlog_time + log_interval:
+                done = (elem + 1) / elems
+                elapsed_time = current_time - start_time
+                total_time = elapsed_time / done
+                rem_time = total_time - elapsed_time
+                self.log(':', f'{done*100:.0f}% done {hr_time(elapsed_time)} elapsed {hr_time(rem_time)} remaining')
+                log_interval = min(600, int(log_interval*1.5))
+                lastlog_time = current_time
 
 
 log = Log()
+"""The standard logger instance."""
+
 
+#
+# Code below mocks basic numba and cuda functions for pure-python fallback.
+#
 
 class MockNumba:
     @staticmethod
@@ -52,17 +152,15 @@ class MockCuda:
         outer = self
 
         def make_launcher(func):
-            class Launcher(object):
+            class Launcher:
                 def __init__(self, funcc):
                     self.func = funcc
 
                 def __call__(self, *args, **kwargs):
-                    # print(f'device func call {self.func.__name__}')
                     return self.func(*args, **kwargs)
 
                 def __getitem__(self, item):
                     grid_dim, block_dim = item
-                    # print(f'kernel call {self.func.__name__} grid_dim:{grid_dim} block_dim:{block_dim}')
 
                     def inner(*args, **kwargs):
                         for grid_x in range(grid_dim[0]):
@@ -104,23 +202,12 @@ if importlib.util.find_spec('numba') is not None:
         cuda = MockCuda()
 else:
     numba = MockNumba()
+    """If Numba is available on the system, it is the actual ``numba`` package.
+    Otherwise, it simply defines an ``njit`` decorator that does nothing.
+    """
     cuda = MockCuda()
+    """If Numba is installed and Cuda GPUs are available, it is the actual ``numba.cuda`` package.
+    Otherwise, it is an object that defines basic methods and decorators so that cuda-code can still
+    run in the Python interpreter.
+    """
     log.warn('Numba unavailable. Falling back to pure Python.')
-
-
-_pop_count_lut = np.asarray([bin(x).count('1') for x in range(256)])
-
-
-def popcount(a):
-    return np.sum(_pop_count_lut[a])
-
-
-def readtext(file):
-    if hasattr(file, 'read'):
-        return file.read()
-    if str(file).endswith('.gz'):
-        with gzip.open(file, 'rt') as f:
-            return f.read()
-    else:
-        with open(file, 'rt') as f:
-            return f.read()
diff --git a/src/kyupy/bench.py b/src/kyupy/bench.py
index 7ec1e1e..21310d5 100644
--- a/src/kyupy/bench.py
+++ b/src/kyupy/bench.py
@@ -14,25 +14,25 @@ from . import readtext
 
 
 class BenchTransformer(Transformer):
-    
+
     def __init__(self, name):
         super().__init__()
         self.c = Circuit(name)
-    
+
     def start(self, _): return self.c
-        
+
     def parameters(self, args): return [self.c.get_or_add_fork(name) for name in args]
-        
+
     def interface(self, args): self.c.interface.extend(args[0])
 
     def assignment(self, args):
         name, cell_type, drivers = args
         cell = Node(self.c, str(name), str(cell_type))
         Line(self.c, cell, self.c.get_or_add_fork(str(name)))
-        [Line(self.c, d, cell) for d in drivers]
+        for d in drivers: Line(self.c, d, cell)
 
 
-grammar = r"""
+GRAMMAR = r"""
     start: (statement)*
     statement: input | output | assignment
     input: ("INPUT" | "input") parameters -> interface
@@ -51,7 +51,7 @@ def parse(text, name=None):
     :param name: The name of the circuit. Circuit names are not included in bench descriptions.
     :return: A :class:`Circuit` object.
     """
-    return Lark(grammar, parser="lalr", transformer=BenchTransformer(name)).parse(text)
+    return Lark(GRAMMAR, parser="lalr", transformer=BenchTransformer(name)).parse(text)
 
 
 def load(file, name=None):
diff --git a/src/kyupy/circuit.py b/src/kyupy/circuit.py
index 84cc96c..5801f0c 100644
--- a/src/kyupy/circuit.py
+++ b/src/kyupy/circuit.py
@@ -53,7 +53,7 @@ class Node:
         """
         self.kind = kind
         """A string describing the type of the node.
-        
+
         Common types are the names from a standard cell library or general gate names like 'AND' or 'NOR'.
         If :py:attr:`kind` is set to '__fork__', it receives special treatment.
         A `fork` describes a named signal or a fan-out point in the circuit and not a physical `cell` like a gate.
@@ -75,6 +75,9 @@ class Node:
         """A list of output connections (:class:`Line` objects).
         """
 
+    def __index__(self):
+        return self.index
+
     def __repr__(self):
         ins = ' '.join([f'<{line.index}' if line is not None else '<None' for line in self.ins])
         outs = ' '.join([f'>{line.index}' if line is not None else '>None' for line in self.outs])
@@ -130,7 +133,7 @@ class Line:
         """
         self.driver_pin = driver[1]
         """The output pin position of the driver node this line is connected to.
-        
+
         This is the position in the outs-list of the driving node this line referenced from:
         :code:`self.driver.outs[self.driver_pin] == self`.
         """
@@ -160,6 +163,9 @@ class Line:
         self.reader = None
         self.circuit = None
 
+    def __index__(self):
+        return self.index
+
     def __repr__(self):
         return f'{self.index}'
 
@@ -187,17 +193,17 @@ class Circuit:
         """
         self.nodes = IndexList()
         """A list of all :class:`Node` objects contained in the circuit.
-        
+
         The position of a node in this list equals its index :code:`self.nodes[42].index == 42`.
         """
         self.lines = IndexList()
         """A list of all :class:`Line` objects contained in the circuit.
-        
+
         The position of a line in this list equals its index :code:`self.lines[42].index == 42`.
         """
         self.interface = GrowingList()
         """A list of nodes that are designated as primary input- or output-ports.
-        
+
         Port-nodes are contained in :py:attr:`nodes` as well as :py:attr:`interface`.
         The position of a node in the interface list corresponds to positions of logic values in test vectors.
         The port direction is not stored explicitly.
@@ -213,7 +219,7 @@ class Circuit:
 
     def get_or_add_fork(self, name):
         return self.forks[name] if name in self.forks else Node(self, name)
-    
+
     def copy(self):
         """Returns a deep copy of the circuit.
         """
@@ -231,7 +237,7 @@ class Circuit:
                 n = c.cells[node.name]
             c.interface.append(n)
         return c
-    
+
     def dump(self):
         """Returns a string representation of the circuit and all its nodes.
         """
@@ -239,8 +245,9 @@ class Circuit:
         return header + '\n'.join([str(n) for n in self.nodes])
 
     def __repr__(self):
-        name = f" '{self.name}'" if self.name else ''
-        return f'<Circuit{name} with {len(self.nodes)} nodes, {len(self.lines)} lines, {len(self.interface)} ports>'
+        name = f' {self.name}' if self.name else ''
+        return f'<Circuit{name} cells={len(self.cells)} forks={len(self.forks)} ' + \
+               f'lines={len(self.lines)} ports={len(self.interface)}>'
 
     def topological_order(self):
         """Generator function to iterate over all nodes in topological order.
@@ -255,8 +262,8 @@ class Circuit:
             for line in n.outs:
                 if line is None: continue
                 succ = line.reader
-                visit_count[succ.index] += 1
-                if visit_count[succ.index] == len(succ.ins) and 'DFF' not in succ.kind:
+                visit_count[succ] += 1
+                if visit_count[succ] == len(succ.ins) and 'DFF' not in succ.kind:
                     queue.append(succ)
             yield n
 
@@ -280,8 +287,8 @@ class Circuit:
             n = queue.popleft()
             for line in n.ins:
                 pred = line.driver
-                visit_count[pred.index] += 1
-                if visit_count[pred.index] == len(pred.outs) and 'DFF' not in pred.kind:
+                visit_count[pred] += 1
+                if visit_count[pred] == len(pred.outs) and 'DFF' not in pred.kind:
                     queue.append(pred)
             yield n
 
@@ -292,13 +299,13 @@ class Circuit:
         """
         marks = [False] * len(self.nodes)
         for n in origin_nodes:
-            marks[n.index] = True
+            marks[n] = True
         for n in self.reversed_topological_order():
-            if not marks[n.index]:
+            if not marks[n]:
                 for line in n.outs:
                     if line is not None:
-                        marks[n.index] |= marks[line.reader.index]
-            if marks[n.index]:
+                        marks[n] |= marks[line.reader]
+            if marks[n]:
                 yield n
 
     def fanout_free_regions(self):
diff --git a/src/kyupy/logic.py b/src/kyupy/logic.py
index d30fd55..7b0c149 100644
--- a/src/kyupy/logic.py
+++ b/src/kyupy/logic.py
@@ -25,7 +25,7 @@ from collections.abc import Iterable
 
 import numpy as np
 
-from . import numba
+from . import numba, hr_bytes
 
 
 ZERO = 0b000
@@ -58,6 +58,12 @@ on a signal. ``'N'``, ``'n'``, and ``'v'`` are interpreted as ``NPULSE``.
 
 
 def interpret(value):
+    """Converts characters, strings, and lists of them to lists of logic constants defined above.
+
+    :param value: A character (string of length 1), Boolean, Integer, None, or Iterable.
+        Iterables (such as strings) are traversed and their individual characters are interpreted.
+    :return: A logic constant or a (possibly multi-dimensional) list of logic constants.
+    """
     if isinstance(value, Iterable) and not (isinstance(value, str) and len(value) == 1):
         return list(map(interpret, value))
     if value in [0, '0', False, 'L', 'l']:
@@ -85,6 +91,79 @@ def bit_in(a, pos):
     return a[pos >> 3] & _bit_in_lut[pos & 7]
 
 
+class MVArray:
+    """An n-dimensional array of m-valued logic values.
+
+    This class wraps a numpy.ndarray of type uint8 and adds support for encoding and
+    interpreting 2-valued, 4-valued, and 8-valued logic values.
+    Each logic value is stored as an uint8, manipulations of individual values are cheaper than in
+    :py:class:`BPArray`.
+
+    :param a: If a tuple is given, it is interpreted as desired shape. To make an array of ``n`` vectors
+        compatible with a simulator ``sim``, use ``(len(sim.interface), n)``. If a :py:class:`BPArray` or
+        :py:class:`MVArray` is given, a deep copy is made. If a string, a list of strings, a list of characters,
+        or a list of lists of characters are given, the data is interpreted best-effort and the array is
+        initialized accordingly.
+    :param m: The arity of the logic. Can be set to 2, 4, or 8. If None is given, the arity of a given
+        :py:class:`BPArray` or :py:class:`MVArray` is used, or, if the array is initialized differently, 8 is used.
+    """
+
+    def __init__(self, a, m=None):
+        self.m = m or 8
+        assert self.m in [2, 4, 8]
+
+        # Try our best to interpret given a.
+        if isinstance(a, MVArray):
+            self.data = a.data.copy()
+            """The wrapped 2-dimensional ndarray of logic values.
+
+            * Axis 0 is PI/PO/FF position, the length of this axis is called "width".
+            * Axis 1 is vector/pattern, the length of this axis is called "length".
+            """
+            self.m = m or a.m
+        elif hasattr(a, 'data'):  # assume it is a BPArray. Can't use isinstance() because BPArray isn't declared yet.
+            self.data = np.zeros((a.width, a.length), dtype=np.uint8)
+            self.m = m or a.m
+            for i in range(a.data.shape[-2]):
+                self.data[...] <<= 1
+                self.data[...] |= np.unpackbits(a.data[..., -i-1, :], axis=1)[:, :a.length]
+            if a.data.shape[-2] == 1:
+                self.data *= 3
+        elif isinstance(a, int):
+            self.data = np.full((a, 1), UNASSIGNED, dtype=np.uint8)
+        elif isinstance(a, tuple):
+            self.data = np.full(a, UNASSIGNED, dtype=np.uint8)
+        else:
+            if isinstance(a, str): a = [a]
+            self.data = np.asarray(interpret(a), dtype=np.uint8)
+            self.data = self.data[:, np.newaxis] if self.data.ndim == 1 else np.moveaxis(self.data, -2, -1)
+
+        # Cast data to m-valued logic.
+        if self.m == 2:
+            self.data[...] = ((self.data & 0b001) & ((self.data >> 1) & 0b001) | (self.data == RISE)) * ONE
+        elif self.m == 4:
+            self.data[...] = (self.data & 0b011) & ((self.data != FALL) * ONE) | ((self.data == RISE) * ONE)
+        elif self.m == 8:
+            self.data[...] = self.data & 0b111
+
+        self.length = self.data.shape[-1]
+        self.width = self.data.shape[-2]
+
+    def __repr__(self):
+        return f'<MVArray length={self.length} width={self.width} m={self.m} mem={hr_bytes(self.data.nbytes)}>'
+
+    def __str__(self):
+        return str([self[idx] for idx in range(self.length)])
+
+    def __getitem__(self, vector_idx):
+        """Returns a string representing the desired vector."""
+        chars = ["0", "X", "-", "1", "P", "R", "F", "N"]
+        return ''.join(chars[v] for v in self.data[:, vector_idx])
+
+    def __len__(self):
+        return self.length
+
+
 def mv_cast(*args, m=8):
     return [a if isinstance(a, MVArray) else MVArray(a, m=m) for a in args]
 
@@ -100,6 +179,13 @@ def _mv_not(m, out, inp):
 
 
 def mv_not(x1, out=None):
+    """A multi-valued NOT operator.
+
+    :param x1: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts.
+    :param out: Optionally an :py:class:`MVArray` as storage destination. If None, a new :py:class:`MVArray`
+        is returned.
+    :return: An :py:class:`MVArray` with the result.
+    """
     m = mv_getm(x1)
     x1 = mv_cast(x1, m=m)[0]
     out = out or MVArray(x1.data.shape, m=m)
@@ -125,6 +211,14 @@ def _mv_or(m, out, *ins):
 
 
 def mv_or(x1, x2, out=None):
+    """A multi-valued OR operator.
+
+    :param x1: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts.
+    :param x2: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts.
+    :param out: Optionally an :py:class:`MVArray` as storage destination. If None, a new :py:class:`MVArray`
+        is returned.
+    :return: An :py:class:`MVArray` with the result.
+    """
     m = mv_getm(x1, x2)
     x1, x2 = mv_cast(x1, x2, m=m)
     out = out or MVArray(np.broadcast(x1.data, x2.data).shape, m=m)
@@ -151,6 +245,14 @@ def _mv_and(m, out, *ins):
 
 
 def mv_and(x1, x2, out=None):
+    """A multi-valued AND operator.
+
+    :param x1: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts.
+    :param x2: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts.
+    :param out: Optionally an :py:class:`MVArray` as storage destination. If None, a new :py:class:`MVArray`
+        is returned.
+    :return: An :py:class:`MVArray` with the result.
+    """
     m = mv_getm(x1, x2)
     x1, x2 = mv_cast(x1, x2, m=m)
     out = out or MVArray(np.broadcast(x1.data, x2.data).shape, m=m)
@@ -174,6 +276,14 @@ def _mv_xor(m, out, *ins):
 
 
 def mv_xor(x1, x2, out=None):
+    """A multi-valued XOR operator.
+
+    :param x1: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts.
+    :param x2: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts.
+    :param out: Optionally an :py:class:`MVArray` as storage destination. If None, a new :py:class:`MVArray`
+        is returned.
+    :return: An :py:class:`MVArray` with the result.
+    """
     m = mv_getm(x1, x2)
     x1, x2 = mv_cast(x1, x2, m=m)
     out = out or MVArray(np.broadcast(x1.data, x2.data).shape, m=m)
@@ -182,6 +292,16 @@ def mv_xor(x1, x2, out=None):
 
 
 def mv_transition(init, final, out=None):
+    """Computes the logic transitions from the initial values of ``init`` to the final values of ``final``.
+    Pulses in the input data are ignored. If any of the inputs are ``UNKNOWN``, the result is ``UNKNOWN``.
+    If both inputs are ``UNASSIGNED``, the result is ``UNASSIGNED``.
+
+    :param init: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts.
+    :param final: An :py:class:`MVArray` or data the :py:class:`MVArray` constructor accepts.
+    :param out: Optionally an :py:class:`MVArray` as storage destination. If None, a new :py:class:`MVArray`
+        is returned.
+    :return: An :py:class:`MVArray` with the result.
+    """
     m = mv_getm(init, final)
     init, final = mv_cast(init, final, m=m)
     init = init.data
@@ -196,65 +316,46 @@ def mv_transition(init, final, out=None):
     return out
 
 
-class MVArray:
-    """An n-dimensional array of m-valued logic values.
-
-    This class wraps a numpy.ndarray of type uint8 and adds support for encoding and
-    interpreting 2-valued, 4-valued, and 8-valued logic values.
-    Each logic value is stored as an uint8, value manipulations are cheaper than in BPArray.
-
-    An MVArray always has 2 axes:
+class BPArray:
+    """An n-dimensional array of m-valued logic values that uses bit-parallel storage.
 
-    * Axis 0 is PI/PO/FF position, the length of this axis is called "width".
-    * Axis 1 is vector/pattern, the length of this axis is called "length".
+    The primary use of this format is in aiding efficient bit-parallel logic simulation.
+    The secondary benefit over :py:class:`MVArray` is its memory efficiency.
+    Accessing individual values is more expensive than with :py:class:`MVArray`.
+    Therefore it may be more efficient to unpack the data into an :py:class:`MVArray` and pack it again into a
+    :py:class:`BPArray` for simulation.
 
+    See :py:class:`MVArray` for constructor parameters.
     """
 
     def __init__(self, a, m=None):
-        self.m = m or 8
-        assert self.m in [2, 4, 8]
-
-        # Try our best to interpret given a.
+        if not isinstance(a, MVArray) and not isinstance(a, BPArray):
+            a = MVArray(a, m)
+            self.m = a.m
         if isinstance(a, MVArray):
-            self.data = a.data.copy()
-            self.m = m or a.m
-        elif hasattr(a, 'data'):  # assume it is a BPArray. Can't use isinstance() because BPArray isn't declared yet.
-            self.data = np.zeros((a.width, a.length), dtype=np.uint8)
-            self.m = m or a.m
-            for i in range(a.data.shape[-2]):
-                self.data[...] <<= 1
-                self.data[...] |= np.unpackbits(a.data[..., -i-1, :], axis=1)[:, :a.length]
-            if a.data.shape[-2] == 1:
-                self.data *= 3
-        elif isinstance(a, int):
-            self.data = np.full((a, 1), UNASSIGNED, dtype=np.uint8)
-        elif isinstance(a, tuple):
-            self.data = np.full(a, UNASSIGNED, dtype=np.uint8)
-        else:
-            if isinstance(a, str): a = [a]
-            self.data = np.asarray(interpret(a), dtype=np.uint8)
-            self.data = self.data[:, np.newaxis] if self.data.ndim == 1 else np.moveaxis(self.data, -2, -1)
-
-        # Cast data to m-valued logic.
-        if self.m == 2:
-            self.data[...] = ((self.data & 0b001) & ((self.data >> 1) & 0b001) | (self.data == RISE)) * ONE
-        elif self.m == 4:
-            self.data[...] = (self.data & 0b011) & ((self.data != FALL) * ONE) | ((self.data == RISE) * ONE)
-        elif self.m == 8:
-            self.data[...] = self.data & 0b111
+            if m is not None and m != a.m:
+                a = MVArray(a, m)  # cast data
+            self.m = a.m
+            assert self.m in [2, 4, 8]
+            nwords = math.ceil(math.log2(self.m))
+            nbytes = (a.data.shape[-1] - 1) // 8 + 1
+            self.data = np.zeros(a.data.shape[:-1] + (nwords, nbytes), dtype=np.uint8)
+            """The wrapped 3-dimensional ndarray.
 
-        self.length = self.data.shape[-1]
-        self.width = self.data.shape[-2]
+            * Axis 0 is PI/PO/FF position, the length of this axis is called "width".
+            * Axis 1 has length ``ceil(log2(m))`` for storing all bits.
+            * Axis 2 are the vectors/patterns packed into uint8 words.
+            """
+            for i in range(self.data.shape[-2]):
+                self.data[..., i, :] = np.packbits((a.data >> i) & 1, axis=-1)
+        else:  # we have a BPArray
+            self.data = a.data.copy()  # TODO: support conversion to different m
+            self.m = a.m
+        self.length = a.length
+        self.width = a.width
 
     def __repr__(self):
-        return f'<MVArray length={self.length} width={self.width} m={self.m} nbytes={self.data.nbytes}>'
-
-    def __str__(self):
-        return str([self[idx] for idx in range(self.length)])
-
-    def __getitem__(self, vector_idx):
-        chars = ["0", "X", "-", "1", "P", "R", "F", "N"]
-        return ''.join(chars[v] for v in self.data[:, vector_idx])
+        return f'<BPArray length={self.length} width={self.width} m={self.m} mem={hr_bytes(self.data.nbytes)}>'
 
     def __len__(self):
         return self.length
@@ -359,44 +460,3 @@ def bp_xor(out, *ins):
         out[..., 0, :] |= any_unknown
         out[..., 1, :] &= ~any_unknown
         out[..., 2, :] &= ~any_unknown
-
-
-class BPArray:
-    """An n-dimensional array of m-valued logic values that uses bit-parallel storage.
-
-    The primary use of this format is in aiding efficient bit-parallel logic simulation.
-    The secondary benefit over MVArray is its memory efficiency.
-    Accessing individual values is more expensive than with :py:class:`MVArray`.
-    It is advised to first construct a MVArray, pack it into a :py:class:`BPArray` for simulation and unpack the results
-    back into a :py:class:`MVArray` for value access.
-
-    The values along the last axis (vectors/patterns) are packed into uint8 words.
-    The second-last axis has length ceil(log2(m)) for storing all bits.
-    All other axes stay the same as in MVArray.
-    """
-
-    def __init__(self, a, m=None):
-        if not isinstance(a, MVArray) and not isinstance(a, BPArray):
-            a = MVArray(a, m)
-            self.m = a.m
-        if isinstance(a, MVArray):
-            if m is not None and m != a.m:
-                a = MVArray(a, m)  # cast data
-            self.m = a.m
-            assert self.m in [2, 4, 8]
-            nwords = math.ceil(math.log2(self.m))
-            nbytes = (a.data.shape[-1] - 1) // 8 + 1
-            self.data = np.zeros(a.data.shape[:-1] + (nwords, nbytes), dtype=np.uint8)
-            for i in range(self.data.shape[-2]):
-                self.data[..., i, :] = np.packbits((a.data >> i) & 1, axis=-1)
-        else:  # we have a BPArray
-            self.data = a.data.copy()  # TODO: support conversion to different m
-            self.m = a.m
-        self.length = a.length
-        self.width = a.width
-
-    def __repr__(self):
-        return f'<BPArray length={self.length} width={self.width} m={self.m} bytes={self.data.nbytes}>'
-
-    def __len__(self):
-        return self.length
diff --git a/src/kyupy/logic_sim.py b/src/kyupy/logic_sim.py
index cddde47..993938a 100644
--- a/src/kyupy/logic_sim.py
+++ b/src/kyupy/logic_sim.py
@@ -1,14 +1,29 @@
+"""A high-throughput combinational logic simulator.
+
+The class :py:class:`~kyupy.logic_sim.LogicSim` performs parallel simulations of the combinational part of a circuit.
+The logic operations are performed bit-parallel on packed numpy arrays.
+Simple sequential circuits can be simulated by repeated assignments and propagations.
+However, this simulator ignores the clock network and simply assumes that all state-elements are clocked all the time.
+"""
+
 import math
 
 import numpy as np
 
-from . import logic
+from . import logic, hr_bytes
 
 
 class LogicSim:
     """A bit-parallel naïve combinational simulator for 2-, 4-, or 8-valued logic.
+
+    :param circuit: The circuit to simulate.
+    :type circuit: :py:class:`~kyupy.circuit.Circuit`
+    :param sims: The number of parallel logic simulations to perform.
+    :type sims: int
+    :param m: The arity of the logic, must be 2, 4, or 8.
+    :type m: int
     """
-    def __init__(self, circuit, sims=1, m=8):
+    def __init__(self, circuit, sims=8, m=8):
         assert m in [2, 4, 8]
         self.m = m
         mdim = math.ceil(math.log2(m))
@@ -16,216 +31,165 @@ class LogicSim:
         self.sims = sims
         nbytes = (sims - 1) // 8 + 1
         self.interface = list(circuit.interface) + [n for n in circuit.nodes if 'dff' in n.kind.lower()]
+        self.width = len(self.interface)
+        """The number of bits in the circuit state (number of ports + number of state-elements)."""
         self.state = np.zeros((len(circuit.lines), mdim, nbytes), dtype='uint8')
         self.state_epoch = np.zeros(len(circuit.nodes), dtype='int8') - 1
         self.tmp = np.zeros((5, mdim, nbytes), dtype='uint8')
         self.zero = np.zeros((mdim, nbytes), dtype='uint8')
         self.epoch = 0
 
-        self.fork_vd1 = self.fork_vdx
-        self.const0_vd1 = self.const0_vdx
-        self.input_vd1 = self.fork_vd1
-        self.output_vd1 = self.fork_vd1
-        self.inv_vd1 = self.not_vd1
-        self.ibuff_vd1 = self.not_vd1
-        self.nbuff_vd1 = self.fork_vd1
-        self.xor2_vd1 = self.xor_vd1
-        
-        self.fork_vd2 = self.fork_vdx
-        self.const0_vd2 = self.const0_vdx
-        self.input_vd2 = self.fork_vd2
-        self.output_vd2 = self.fork_vd2
-        self.inv_vd2 = self.not_vd2
-        self.ibuff_vd2 = self.not_vd2
-        self.nbuff_vd2 = self.fork_vd2
-        self.xor2_vd2 = self.xor_vd2
-        
-        self.fork_vd3 = self.fork_vdx
-        self.const0_vd3 = self.const0_vdx
-        self.input_vd3 = self.fork_vd3
-        self.output_vd3 = self.fork_vd3
-        self.inv_vd3 = self.not_vd3
-        self.ibuff_vd3 = self.not_vd3
-        self.nbuff_vd3 = self.fork_vd3
-        self.xor2_vd3 = self.xor_vd3
-        
-        known_fct = [(f[:-4], getattr(self, f)) for f in dir(self) if f.endswith(f'_vd{mdim}')]
+        known_fct = [(f[:-4], getattr(self, f)) for f in dir(self) if f.endswith('_fct')]
         self.node_fct = []
         for n in circuit.nodes:
             t = n.kind.lower().replace('__fork__', 'fork')
+            t = t.replace('nbuff', 'fork')
+            t = t.replace('input', 'fork')
+            t = t.replace('output', 'fork')
             t = t.replace('__const0__', 'const0')
             t = t.replace('__const1__', 'const1')
             t = t.replace('tieh', 'const1')
+            t = t.replace('ibuff', 'not')
+            t = t.replace('inv', 'not')
+
             fcts = [f for n, f in known_fct if t.startswith(n)]
             if len(fcts) < 1:
                 raise ValueError(f'Unknown node kind {n.kind}')
             self.node_fct.append(fcts[0])
 
+    def __repr__(self):
+        return f'<LogicSim {self.circuit.name} sims={self.sims} m={self.m} state_mem={hr_bytes(self.state.nbytes)}>'
+
     def assign(self, stimuli):
-        """Assign stimuli to the primary inputs and state-elements (flip-flops)."""
-        if hasattr(stimuli, 'data'):
-            stimuli = stimuli.data
-        for stim, node in zip(stimuli, self.interface):
+        """Assign stimuli to the primary inputs and state-elements (flip-flops).
+
+        :param stimuli: The input data to assign. Must be in bit-parallel storage format and in a compatible shape.
+        :type stimuli: :py:class:`~kyupy.logic.BPArray`
+        :returns: The given stimuli object.
+        """
+        for node, stim in zip(self.interface, stimuli.data if hasattr(stimuli, 'data') else stimuli):
             if len(node.outs) == 0: continue
-            outputs = [self.state[line.index] if line else self.tmp[3] for line in node.outs]
-            self.node_fct[node.index]([stim], outputs)
+            outputs = [self.state[line] if line else self.tmp[3] for line in node.outs]
+            self.node_fct[node]([stim], outputs)
             for line in node.outs:
-                if line:
-                    self.state_epoch[line.reader.index] = self.epoch
+                if line is not None: self.state_epoch[line.reader] = self.epoch
         for n in self.circuit.nodes:
-            if (n.kind == '__const1__') or (n.kind == '__const0__'):
-                outputs = [self.state[line.index] if line else self.tmp[3] for line in n.outs]
-                self.node_fct[n.index]([], outputs)
-                # print('assign const')
+            if n.kind in ('__const1__', '__const0__'):
+                outputs = [self.state[line] if line else self.tmp[3] for line in n.outs]
+                self.node_fct[n]([], outputs)
                 for line in n.outs:
-                    if line:
-                        self.state_epoch[line.reader.index] = self.epoch
+                    if line is not None: self.state_epoch[line.reader] = self.epoch
+        return stimuli
 
     def capture(self, responses):
-        """Capture the current values at the primary outputs and in the state-elements (flip-flops)."""
-        if hasattr(responses, 'data'):
-            responses = responses.data
-        for resp, node in zip(responses, self.interface):
-            if len(node.ins) == 0: continue
-            resp[...] = self.state[node.ins[0].index]
-        # print(responses)
-
-    def propagate(self):
-        """Propagate the input values towards the outputs (Perform all logic operations in topological order)."""
+        """Capture the current values at the primary outputs and in the state-elements (flip-flops).
+
+        :param responses: A bit-parallel storage target for the responses in a compatible shape.
+        :type responses: :py:class:`~kyupy.logic.BPArray`
+        :returns: The given responses object.
+        """
+        for node, resp in zip(self.interface, responses.data if hasattr(responses, 'data') else responses):
+            if len(node.ins) > 0: resp[...] = self.state[node.ins[0]]
+        return responses
+
+    def propagate(self, inject_cb=None):
+        """Propagate the input values towards the outputs (Perform all logic operations in topological order).
+
+        If the circuit is sequential (it contains flip-flops), one call simulates one clock cycle.
+        Multiple clock cycles are simulated by a assign-propagate-capture loop:
+
+        .. code-block:: python
+
+           # initial state in state_bp
+           for cycle in range(10):  # simulate 10 clock cycles
+               sim.assign(state_bp)
+               sim.propagate()
+               sim.capture(state_bp)
+
+        :param inject_cb: A callback function for manipulating intermediate signal values.
+            This function is called with a line index and its new logic values (in bit-parallel format) after
+            evaluation of a node. The callback may manipulate the given values in-place, the simulation
+            resumes with the manipulated values after the callback returns.
+        :type inject_cb: ``f(int, ndarray)``
+        """
         for node in self.circuit.topological_order():
-            if self.state_epoch[node.index] != self.epoch: continue
-            inputs = [self.state[line.index] if line else self.zero for line in node.ins]
-            outputs = [self.state[line.index] if line else self.tmp[3] for line in node.outs]
+            if self.state_epoch[node] != self.epoch: continue
+            inputs = [self.state[line] if line else self.zero for line in node.ins]
+            outputs = [self.state[line] if line else self.tmp[3] for line in node.outs]
             # print('sim', node)
-            self.node_fct[node.index](inputs, outputs)
+            self.node_fct[node](inputs, outputs)
             for line in node.outs:
-                self.state_epoch[line.reader.index] = self.epoch
+                if inject_cb is not None: inject_cb(line, self.state[line])
+                self.state_epoch[line.reader] = self.epoch
         self.epoch = (self.epoch + 1) % 128
 
-    def fork_vdx(self, inputs, outputs):
+    def cycle(self, state, inject_cb=None):
+        """Assigns the given state, propagates it and captures the new state.
+
+        :param state: A bit-parallel array in a compatible shape holding the current circuit state.
+            The contained data is assigned to the PI and PPI and overwritten by data at the PO and PPO after
+            propagation.
+        :type state: :py:class:`~kyupy.logic.BPArray`
+        :param inject_cb: A callback function for manipulating intermediate signal values. See :py:func:`propagate`.
+        :returns: The given state object.
+        """
+        self.assign(state)
+        self.propagate(inject_cb)
+        return self.capture(state)
+
+    @staticmethod
+    def fork_fct(inputs, outputs):
         for o in outputs: o[...] = inputs[0]
-    
-    def const0_vdx(self, _, outputs):
-        for o in outputs: o[...] = self.zero
-
-    # 2-valued simulation
-
-    def not_vd1(self, inputs, outputs):
-        outputs[0][0] = ~inputs[0][0]
-
-    def const1_vd1(self, _, outputs):
-        for o in outputs: o[...] = self.zero
-        self.not_vd1(outputs, outputs)
-
-    def and_vd1(self, inputs, outputs):
-        o = outputs[0]
-        o[0] = inputs[0][0]
-        for i in inputs[1:]: o[0] &= i[0]
-
-    def or_vd1(self, inputs, outputs):
-        o = outputs[0]
-        o[0] = inputs[0][0]
-        for i in inputs[1:]: o[0] |= i[0]
-
-    def xor_vd1(self, inputs, outputs):
-        o = outputs[0]
-        o[0] = inputs[0][0]
-        for i in inputs[1:]: o[0] ^= i[0]
-
-    def sdff_vd1(self, inputs, outputs):
-        outputs[0][0] = inputs[0][0]
-        if len(outputs) > 1:
-            outputs[1][0] = ~inputs[0][0]
-
-    def dff_vd1(self, inputs, outputs):
-        outputs[0][0] = inputs[0][0]
-        if len(outputs) > 1:
-            outputs[1][0] = ~inputs[0][0]
-
-    def nand_vd1(self, inputs, outputs):
-        self.and_vd1(inputs, outputs)
-        self.not_vd1(outputs, outputs)
 
-    def nor_vd1(self, inputs, outputs):
-        self.or_vd1(inputs, outputs)
-        self.not_vd1(outputs, outputs)
+    @staticmethod
+    def const0_fct(_, outputs):
+        for o in outputs: o[...] = 0
 
-    def xnor_vd1(self, inputs, outputs):
-        self.xor_vd1(inputs, outputs)
-        self.not_vd1(outputs, outputs)
+    @staticmethod
+    def const1_fct(_, outputs):
+        for o in outputs:
+            o[...] = 0
+            logic.bp_not(o, o)
 
-    # 4-valued simulation
-
-    def not_vd2(self, inputs, outputs):
+    @staticmethod
+    def not_fct(inputs, outputs):
         logic.bp_not(outputs[0], inputs[0])
 
-    def and_vd2(self, inputs, outputs):
+    @staticmethod
+    def and_fct(inputs, outputs):
         logic.bp_and(outputs[0], *inputs)
 
-    def or_vd2(self, inputs, outputs):
+    @staticmethod
+    def or_fct(inputs, outputs):
         logic.bp_or(outputs[0], *inputs)
 
-    def xor_vd2(self, inputs, outputs):
+    @staticmethod
+    def xor_fct(inputs, outputs):
         logic.bp_xor(outputs[0], *inputs)
 
-    def sdff_vd2(self, inputs, outputs):
-        self.dff_vd2(inputs, outputs)
+    @staticmethod
+    def sdff_fct(inputs, outputs):
+        logic.bp_buf(outputs[0], inputs[0])
         if len(outputs) > 1:
             logic.bp_not(outputs[1], inputs[0])
 
-    def dff_vd2(self, inputs, outputs):
+    @staticmethod
+    def dff_fct(inputs, outputs):
         logic.bp_buf(outputs[0], inputs[0])
+        if len(outputs) > 1:
+            logic.bp_not(outputs[1], inputs[0])
 
-    def nand_vd2(self, inputs, outputs):
-        self.and_vd2(inputs, outputs)
-        self.not_vd2(outputs, outputs)
-
-    def nor_vd2(self, inputs, outputs):
-        self.or_vd2(inputs, outputs)
-        self.not_vd2(outputs, outputs)
-
-    def xnor_vd2(self, inputs, outputs):
-        self.xor_vd2(inputs, outputs)
-        self.not_vd2(outputs, outputs)
-    
-    def const1_vd2(self, _, outputs):
-        for o in outputs: o[...] = self.zero
-        self.not_vd2(outputs, outputs)
-
-    # 8-valued simulation
-
-    def not_vd3(self, inputs, outputs):
-        logic.bp_not(outputs[0], inputs[0])
-
-    def and_vd3(self, inputs, outputs):
+    @staticmethod
+    def nand_fct(inputs, outputs):
         logic.bp_and(outputs[0], *inputs)
+        logic.bp_not(outputs[0], outputs[0])
 
-    def or_vd3(self, inputs, outputs):
+    @staticmethod
+    def nor_fct(inputs, outputs):
         logic.bp_or(outputs[0], *inputs)
+        logic.bp_not(outputs[0], outputs[0])
 
-    def xor_vd3(self, inputs, outputs):
+    @staticmethod
+    def xnor_fct(inputs, outputs):
         logic.bp_xor(outputs[0], *inputs)
-
-    def sdff_vd3(self, inputs, outputs):
-        self.dff_vd3(inputs, outputs)
-        if len(outputs) > 1:
-            logic.bp_not(outputs[1], inputs[0])
-
-    def dff_vd3(self, inputs, outputs):
-        logic.bp_buf(outputs[0], inputs[0])
-
-    def nand_vd3(self, inputs, outputs):
-        self.and_vd3(inputs, outputs)
-        self.not_vd3(outputs, outputs)
-
-    def nor_vd3(self, inputs, outputs):
-        self.or_vd3(inputs, outputs)
-        self.not_vd3(outputs, outputs)
-
-    def xnor_vd3(self, inputs, outputs):
-        self.xor_vd3(inputs, outputs)
-        self.not_vd3(outputs, outputs)
-        
-    def const1_vd3(self, _, outputs):
-        for o in outputs: o[...] = self.zero
-        self.not_vd3(outputs, outputs)
+        logic.bp_not(outputs[0], outputs[0])
diff --git a/src/kyupy/saed.py b/src/kyupy/saed.py
deleted file mode 100644
index 21771fd..0000000
--- a/src/kyupy/saed.py
+++ /dev/null
@@ -1,289 +0,0 @@
-from kyupy.circuit import Node, Line
-
-
-def pin_index(cell_type, pin):
-    if cell_type.startswith('HADD') and pin == 'B0': return 1
-    if cell_type.startswith('HADD') and pin == 'SO': return 1
-    if cell_type.startswith('MUX21') and pin == 'S': return 2
-    if cell_type.startswith('SDFF') and pin == 'QN': return 1
-    if cell_type.startswith('DFF') and pin == 'QN': return 1
-    if cell_type.startswith('DFF') and pin == 'CLK': return 1
-    if cell_type.startswith('DFF') and pin == 'RSTB': return 2
-    if cell_type.startswith('DFF') and pin == 'SETB': return 3
-    if pin in ['A2', 'IN2', 'SE', 'B', 'CO']: return 1
-    if pin in ['A3', 'IN3', 'SI', 'CI']: return 2
-    if pin == 'A4' or pin == 'IN4' or pin == 'CLK': return 3  # CLK for scan cells SDFF
-    if pin == 'A5' or pin == 'IN5' or pin == 'RSTB': return 4
-    if pin == 'A6' or pin == 'IN6' or pin == 'SETB': return 5
-    return 0
-
-
-def pin_is_output(kind, pin):
-    if 'MUX' in kind and pin == 'S':
-        return False
-    return pin in ['Q', 'QN', 'Z', 'ZN', 'Y', 'CO', 'S', 'SO', 'C1']
-
-
-def add_and_connect(circuit, name, kind, in1=None, in2=None, out=None):
-    n = Node(circuit, name, kind)
-    if in1 is not None:
-        n.ins[0] = in1
-        in1.reader = n
-        in1.reader_pin = 0
-    if in2 is not None:
-        n.ins[1] = in2
-        in2.reader = n
-        in2.reader_pin = 1
-    if out is not None:
-        n.outs[0] = out
-        out.driver = n
-        out.driver_pin = 0
-    return n
-
-
-def split_complex_gates(circuit):
-    node_list = circuit.nodes
-    for n in node_list:
-        name = n.name
-        ins = n.ins
-        outs = n.outs
-        if n.kind.startswith('AO21X'):
-            n.remove()
-            n_and = add_and_connect(circuit, name+'~and', 'AND2', ins[0], ins[1], None)
-            n_or = add_and_connect(circuit, name+'~or', 'OR2', None, ins[2], outs[0])
-            Line(circuit, n_and, n_or)
-        elif n.kind.startswith('AOI21X'):
-            n.remove()
-            n_and = add_and_connect(circuit, name+'~and', 'AND2', ins[0], ins[1], None)
-            n_nor = add_and_connect(circuit, name+'~nor', 'NOR2', None, ins[2], outs[0])
-            Line(circuit, n_and, n_nor)
-        elif n.kind.startswith('OA21X'):
-            n.remove()
-            n_or = add_and_connect(circuit, name+'~or', 'OR2', ins[0], ins[1], None)
-            n_and = add_and_connect(circuit, name+'~and', 'AND2', None, ins[2], outs[0])
-            Line(circuit, n_or, n_and)
-        elif n.kind.startswith('OAI21X'):
-            n.remove()
-            n_or = add_and_connect(circuit, name+'~or', 'OR2', ins[0], ins[1], None)
-            n_nand = add_and_connect(circuit, name+'~nand', 'NAND2', None, ins[2], outs[0])
-            Line(circuit, n_or, n_nand)
-        elif n.kind.startswith('OA22X'):
-            n.remove()
-            n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
-            n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
-            n_and = add_and_connect(circuit, name+'~and', 'AND2', None, None, outs[0])
-            Line(circuit, n_or0, n_and)
-            Line(circuit, n_or1, n_and)
-        elif n.kind.startswith('OAI22X'):
-            n.remove()
-            n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
-            n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
-            n_nand = add_and_connect(circuit, name+'~nand', 'NAND2', None, None, outs[0])
-            Line(circuit, n_or0, n_nand)
-            Line(circuit, n_or1, n_nand)
-        elif n.kind.startswith('AO22X'):
-            n.remove()
-            n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
-            n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
-            n_or = add_and_connect(circuit, name+'~or', 'OR2', None, None, outs[0])
-            Line(circuit, n_and0, n_or)
-            Line(circuit, n_and1, n_or)
-        elif n.kind.startswith('AOI22X'):
-            n.remove()
-            n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
-            n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
-            n_nor = add_and_connect(circuit, name+'~nor', 'NOR2', None, None, outs[0])
-            Line(circuit, n_and0, n_nor)
-            Line(circuit, n_and1, n_nor)
-        elif n.kind.startswith('AO221X'):
-            n.remove()
-            n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
-            n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
-            n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', None, None, None)
-            n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', None, ins[4], outs[0])
-            Line(circuit, n_and0, n_or0)
-            Line(circuit, n_and1, n_or0)
-            Line(circuit, n_or0, n_or1)     
-        elif n.kind.startswith('AOI221X'):
-            n.remove()
-            n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
-            n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
-            n_or = add_and_connect(circuit, name+'~or', 'OR2', None, None, None)
-            n_nor = add_and_connect(circuit, name+'~nor', 'NOR2', None, ins[4], outs[0])
-            Line(circuit, n_and0, n_or)
-            Line(circuit, n_and1, n_or)
-            Line(circuit, n_or, n_nor)     
-        elif n.kind.startswith('OA221X'):
-            n.remove()
-            n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
-            n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
-            n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None)
-            n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', None, ins[4], outs[0])
-            Line(circuit, n_or0, n_and0)
-            Line(circuit, n_or1, n_and0)
-            Line(circuit, n_and0, n_and1)    
-        elif n.kind.startswith('OAI221X'):
-            n.remove()
-            n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
-            n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
-            n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None)
-            n_nand1 = add_and_connect(circuit, name+'~nand1', 'NAND2', None, ins[4], outs[0])
-            Line(circuit, n_or0, n_and0)
-            Line(circuit, n_or1, n_and0)
-            Line(circuit, n_and0, n_nand1)
-        elif n.kind.startswith('AO222X'):
-            n.remove()
-            n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
-            n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
-            n_and2 = add_and_connect(circuit, name+'~and2', 'AND2', ins[4], ins[5], None)
-            n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', None, None, None)
-            n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', None, None, outs[0])
-            Line(circuit, n_and0, n_or0)
-            Line(circuit, n_and1, n_or0)
-            Line(circuit, n_and2, n_or1)
-            Line(circuit, n_or0, n_or1)
-        elif n.kind.startswith('AOI222X'):
-            n.remove()
-            n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
-            n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
-            n_and2 = add_and_connect(circuit, name+'~and2', 'AND2', ins[4], ins[5], None)
-            n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', None, None, None)
-            n_nor1 = add_and_connect(circuit, name+'~nor1', 'NOR2', None, None, outs[0])
-            Line(circuit, n_and0, n_or0)
-            Line(circuit, n_and1, n_or0)
-            Line(circuit, n_and2, n_nor1)
-            Line(circuit, n_or0, n_nor1)
-        elif n.kind.startswith('OA222X'):
-            n.remove()
-            n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
-            n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
-            n_or2 = add_and_connect(circuit, name+'~or2', 'OR2', ins[4], ins[5], None)
-            n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None)
-            n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', None, None, outs[0])
-            Line(circuit, n_or0, n_and0)
-            Line(circuit, n_or1, n_and0)
-            Line(circuit, n_or2, n_and1)
-            Line(circuit, n_and0, n_and1)
-        elif n.kind.startswith('OAI222X'):
-            n.remove()
-            n0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
-            n1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
-            n2 = add_and_connect(circuit, name+'~or2', 'OR2', ins[4], ins[5], None)
-            n3 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None)
-            n4 = add_and_connect(circuit, name+'~nand1', 'NAND2', None, None, outs[0])
-            Line(circuit, n0, n3)
-            Line(circuit, n1, n3)
-            Line(circuit, n2, n4)
-            Line(circuit, n3, n4)
-        elif n.kind.startswith('AND3X'):
-            n.remove()
-            n0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
-            n1 = add_and_connect(circuit, name+'~and1', 'AND2', None, ins[2], outs[0])
-            Line(circuit, n0, n1)
-        elif n.kind.startswith('OR3X'):
-            n.remove()
-            n0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
-            n1 = add_and_connect(circuit, name+'~or1', 'OR2', None, ins[2], outs[0])
-            Line(circuit, n0, n1)
-        elif n.kind.startswith('XOR3X'):
-            n.remove()
-            n0 = add_and_connect(circuit, name+'~xor0', 'XOR2', ins[0], ins[1], None)
-            n1 = add_and_connect(circuit, name+'~xor1', 'XOR2', None, ins[2], outs[0])
-            Line(circuit, n0, n1)
-        elif n.kind.startswith('NAND3X'):
-            n.remove()
-            n0 = add_and_connect(circuit, name+'~and', 'AND2', ins[0], ins[1], None)
-            n1 = add_and_connect(circuit, name+'~nand', 'NAND2', None, ins[2], outs[0])
-            Line(circuit, n0, n1)
-        elif n.kind.startswith('NOR3X'):
-            n.remove()
-            n0 = add_and_connect(circuit, name+'~or', 'OR2', ins[0], ins[1], None)
-            n1 = add_and_connect(circuit, name+'~nor', 'NOR2', None, ins[2], outs[0])
-            Line(circuit, n0, n1)
-        elif n.kind.startswith('XNOR3X'):
-            n.remove()
-            n0 = add_and_connect(circuit, name+'~xor', 'XOR2', ins[0], ins[1], None)
-            n1 = add_and_connect(circuit, name+'~xnor', 'XNOR2', None, ins[2], outs[0])
-            Line(circuit, n0, n1)
-        elif n.kind.startswith('AND4X'):
-            n.remove()
-            n0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
-            n1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
-            n2 = add_and_connect(circuit, name+'~and2', 'AND2', None, None, outs[0])
-            Line(circuit, n0, n2)
-            Line(circuit, n1, n2)
-        elif n.kind.startswith('OR4X'):
-            n.remove()
-            n0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
-            n1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
-            n2 = add_and_connect(circuit, name+'~or2', 'OR2', None, None, outs[0])
-            Line(circuit, n0, n2)
-            Line(circuit, n1, n2)
-        elif n.kind.startswith('NAND4X'):
-            n.remove()
-            n0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
-            n1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
-            n2 = add_and_connect(circuit, name+'~nand2', 'NAND2', None, None, outs[0])
-            Line(circuit, n0, n2)
-            Line(circuit, n1, n2)
-        elif n.kind.startswith('NOR4X'):
-            n.remove()
-            n0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
-            n1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
-            n2 = add_and_connect(circuit, name+'~nor2', 'NOR2', None, None, outs[0])
-            Line(circuit, n0, n2)
-            Line(circuit, n1, n2)
-        elif n.kind.startswith('FADDX'):
-            n.remove()
-            # forks for fan-outs
-            f_a = add_and_connect(circuit, name + '~fork0', '__fork__', ins[0])
-            f_b = add_and_connect(circuit, name + '~fork1', '__fork__', ins[1])
-            f_ci = add_and_connect(circuit, name + '~fork2', '__fork__', ins[2])
-            f_ab = Node(circuit, name + '~fork3')
-            # sum-block
-            n_xor0 = Node(circuit, name + '~xor0', 'XOR2')
-            Line(circuit, f_a, n_xor0)
-            Line(circuit, f_b, n_xor0)
-            Line(circuit, n_xor0, f_ab)
-            if len(outs) > 0 and outs[0] is not None:
-                n_xor1 = add_and_connect(circuit, name + '~xor1', 'XOR2', None, None, outs[0])
-                Line(circuit, f_ab, n_xor1)
-                Line(circuit, f_ci, n_xor1)
-            # carry-block
-            if len(outs) > 1 and outs[1] is not None:
-                n_and0 = Node(circuit, name + '~and0', 'AND2')
-                Line(circuit, f_ab, n_and0)
-                Line(circuit, f_ci, n_and0)
-                n_and1 = Node(circuit, name + '~and1', 'AND2')
-                Line(circuit, f_a, n_and1)
-                Line(circuit, f_b, n_and1)
-                n_or = add_and_connect(circuit, name + '~or0', 'OR2', None, None, outs[1])
-                Line(circuit, n_and0, n_or)
-                Line(circuit, n_and1, n_or)
-        elif n.kind.startswith('HADDX'):
-            n.remove()
-            # forks for fan-outs
-            f_a = add_and_connect(circuit, name + '~fork0', '__fork__', ins[0])
-            f_b = add_and_connect(circuit, name + '~fork1', '__fork__', ins[1])
-            n_xor0 = add_and_connect(circuit, name + '~xor0', 'XOR2', None, None, outs[1])
-            Line(circuit, f_a, n_xor0)
-            Line(circuit, f_b, n_xor0)
-            n_and0 = add_and_connect(circuit, name + '~and0', 'AND2', None, None, outs[0])
-            Line(circuit, f_a, n_and0)
-            Line(circuit, f_b, n_and0)
-        elif n.kind.startswith('MUX21X'):
-            n.remove()
-            f_s = add_and_connect(circuit, name + '~fork0', '__fork__', ins[2])
-            n_not = Node(circuit, name + '~not', 'INV')
-            Line(circuit, f_s, n_not)
-            n_and0 = add_and_connect(circuit, name + '~and0', 'AND2', ins[0])
-            n_and1 = add_and_connect(circuit, name + '~and1', 'AND2', ins[1])
-            n_or0 = add_and_connect(circuit, name + '~or0', 'OR2', None, None, outs[0])
-            Line(circuit, n_not, n_and0)
-            Line(circuit, f_s, n_and1)
-            Line(circuit, n_and0, n_or0)
-            Line(circuit, n_and1, n_or0)
-        elif n.kind.startswith('DFFSSR'):
-            n.kind = 'DFFX1'
-            n_and0 = add_and_connect(circuit, name + '~and0', 'AND2', ins[0], ins[2], None)
-            Line(circuit, n_and0, (n, 0))
diff --git a/src/kyupy/sdf.py b/src/kyupy/sdf.py
index beb58c0..f89d7b3 100644
--- a/src/kyupy/sdf.py
+++ b/src/kyupy/sdf.py
@@ -14,6 +14,7 @@ import numpy as np
 from lark import Lark, Transformer
 
 from . import log, readtext
+from .techlib import TechLib
 
 
 Interconnect = namedtuple('Interconnect', ['orig', 'dest', 'r', 'f'])
@@ -35,7 +36,7 @@ class DelayFile:
         return '\n'.join(f'{n}: {l}' for n, l in self.cells.items()) + '\n' + \
                '\n'.join(str(i) for i in self.interconnects)
 
-    def annotation(self, circuit, pin_index_f, dataset=1, interconnect=True, ffdelays=True):
+    def annotation(self, circuit, tlib=TechLib(), dataset=1, interconnect=True, ffdelays=True):
         """Constructs an 3-dimensional ndarray with timing data for each line in ``circuit``.
 
         An IOPATH delay for a node is annotated to the line connected to the input pin specified in the IOPATH.
@@ -43,29 +44,36 @@ class DelayFile:
         Currently, only ABSOLUTE IOPATH and INTERCONNECT delays are supported.
         Pulse rejection limits are derived from absolute delays, explicit declarations (PATHPULSE etc.) are ignored.
 
-        :param circuit:
-        :param pin_index_f:
-        :param ffdelays:
-        :param interconnect:
-        :type dataset: int or tuple
+        :param circuit: The circuit to annotate. Names from the STIL file are matched to the node names.
+        :type circuit: :class:`~kyupy.circuit.Circuit`
+        :param tlib: A technology library object that provides pin name mappings.
+        :type tlib: :py:class:`~kyupy.techlib.TechLib`
+        :param dataset: SDFs store multiple values for each delay (e.g. minimum, typical, maximum).
+            An integer selects the dataset to use (default is 1 for 'typical').
+            If a tuple is given, the annotator will calculate the average of multiple datasets.
+        :type dataset: ``int`` or ``tuple``
+        :param interconnect: Whether or not to include the delays of interconnects in the annotation.
+            To properly annotate interconnect delays, the circuit model has to include a '__fork__' node on
+            every signal and every fanout-branch. The Verilog parser aids in this by setting the parameter
+            `branchforks=True` in :py:func:`kyupy.verilog.parse`.
+        :type interconnect: ``bool``
+        :param ffdelays: Whether or not to include the delays of flip-flops in the annotation.
+        :type ffdelays: ``bool``
         :return: A 3-dimensional ndarray with timing data.
 
             * Axis 0: line index.
-            * Axis 1: type of timing data: 0=`delay`, 1=`pulse rejection limit`.
-            * Axis 2: The polarity of the output transition of the reading node: 0=`rising`, 1=`falling`.
+            * Axis 1: type of timing data: 0='delay', 1='pulse rejection limit'.
+            * Axis 2: The polarity of the output transition of the reading node: 0='rising', 1='falling'.
 
             The polarity for pulse rejection is determined by the latter transition of the pulse.
-            E.g., timing[42,1,0] is the rejection limit of a negative pulse at the output of the reader of line 42.
+            E.g., ``timing[42, 1, 0]`` is the rejection limit of a negative pulse at the output
+            of the reader of line 42.
         """
         def select_del(_delvals, idx):
-            if type(dataset) is tuple:
-                s = 0
-                for d in dataset:
-                    s += _delvals[idx][d]
-                return s / len(dataset)
-            else:
-                return _delvals[idx][dataset]
-        
+            if isinstance(dataset, tuple):
+                return sum(_delvals[idx][d] for d in dataset) / len(dataset)
+            return _delvals[idx][dataset]
+
         def find_cell(name):
             if name not in circuit.cells:
                 name = name.replace('\\', '')
@@ -74,7 +82,7 @@ class DelayFile:
             if name not in circuit.cells:
                 return None
             return circuit.cells[name]
-        
+
         timing = np.zeros((len(circuit.lines), 2, 2))
         for cn, iopaths in self.cells.items():
             for ipn, opn, *delvals in iopaths:
@@ -85,17 +93,17 @@ class DelayFile:
                 if cell is None:
                     log.warn(f'Cell from SDF not found in circuit: {cn}')
                     continue
-                ipin = pin_index_f(cell.kind, ipn)
-                opin = pin_index_f(cell.kind, opn)
+                ipin = tlib.pin_index(cell.kind, ipn)
+                opin = tlib.pin_index(cell.kind, opn)
                 kind = cell.kind.lower()
 
                 ipn2 = ipn.replace('(posedge A1)', 'A1').replace('(negedge A1)', 'A1')\
                     .replace('(posedge A2)', 'A2').replace('(negedge A2)', 'A2')
-                
+
                 def add_delays(_line):
                     if _line is not None:
-                        timing[_line.index, :, 0] += select_del(delvals, 0)
-                        timing[_line.index, :, 1] += select_del(delvals, 1)
+                        timing[_line, :, 0] += select_del(delvals, 0)
+                        timing[_line, :, 1] += select_del(delvals, 1)
 
                 take_avg = False
                 if kind.startswith('sdff'):
@@ -105,16 +113,16 @@ class DelayFile:
                         add_delays(cell.outs[opin])
                 else:
                     if kind.startswith(('xor', 'xnor')):
-                        ipin = pin_index_f(cell.kind, ipn2)
-                        # print(ipn, ipin, times[cell.i_lines[ipin].index, 0, 0])
-                        take_avg = timing[cell.ins[ipin].index].sum() > 0
+                        ipin = tlib.pin_index(cell.kind, ipn2)
+                        # print(ipn, ipin, times[cell.i_lines[ipin], 0, 0])
+                        take_avg = timing[cell.ins[ipin]].sum() > 0
                     add_delays(cell.ins[ipin])
                     if take_avg:
-                        timing[cell.ins[ipin].index] /= 2
-        
+                        timing[cell.ins[ipin]] /= 2
+
         if not interconnect or self.interconnects is None:
             return timing
-        
+
         for n1, n2, *delvals in self.interconnects:
             delvals = [d if len(d) > 0 else [0, 0, 0] for d in delvals]
             if max(max(delvals)) == 0:
@@ -139,7 +147,7 @@ class DelayFile:
             if c2 is None:
                 log.warn(f'Cell from SDF not found in circuit: {cn2}')
                 continue
-            p1, p2 = pin_index_f(c1.kind, pn1), pin_index_f(c2.kind, pn2)
+            p1, p2 = tlib.pin_index(c1.kind, pn1), tlib.pin_index(c2.kind, pn2)
             line = None
             f1, f2 = c1.outs[p1].reader, c2.ins[p2].driver
             if f1 != f2:  # possible branchfork
@@ -149,8 +157,8 @@ class DelayFile:
             elif len(f2.outs) == 1:  # no fanout?
                 line = f2.ins[0]
             if line is not None:
-                timing[line.index, :, 0] += select_del(delvals, 0)
-                timing[line.index, :, 1] += select_del(delvals, 1)
+                timing[line, :, 0] += select_del(delvals, 0)
+                timing[line, :, 1] += select_del(delvals, 1)
             else:
                 log.warn(f'No branchfork for annotating interconnect delay {c1.name}/{p1}->{c2.name}/{p2}')
         return timing
@@ -184,7 +192,7 @@ class SdfTransformer(Transformer):
         return DelayFile(name, cells)
 
 
-grammar = r"""
+GRAMMAR = r"""
     start: "(DELAYFILE" ( "(SDFVERSION" _NOB ")"
         | "(DESIGN" "\"" NAME "\"" ")"
         | "(DATE" _NOB ")"
@@ -218,7 +226,7 @@ grammar = r"""
 
 def parse(text):
     """Parses the given ``text`` and returns a :class:`DelayFile` object."""
-    return Lark(grammar, parser="lalr", transformer=SdfTransformer()).parse(text)
+    return Lark(GRAMMAR, parser="lalr", transformer=SdfTransformer()).parse(text)
 
 
 def load(file):
diff --git a/src/kyupy/stil.py b/src/kyupy/stil.py
index 5c022ca..75bffc2 100644
--- a/src/kyupy/stil.py
+++ b/src/kyupy/stil.py
@@ -4,7 +4,7 @@ The main purpose of this parser is to load scan pattern sets from STIL files.
 It supports only a very limited subset of STIL.
 
 The functions :py:func:`load` and :py:func:`read` return an intermediate representation (:class:`StilFile` object).
-Call :py:func:`StilFile.tests4v`, :py:func:`StilFile.tests8v`, or :py:func:`StilFile.responses4v` to
+Call :py:func:`StilFile.tests`, :py:func:`StilFile.tests_loc`, or :py:func:`StilFile.responses` to
 obtain the appropriate vector sets.
 """
 
@@ -54,26 +54,26 @@ class StilFile:
                     launch = dict((k, v.replace('\n', '')) for k, v in call.parameters.items())
                 else:
                     capture = dict((k, v.replace('\n', '')) for k, v in call.parameters.items())
-    
+
     def _maps(self, c):
         interface = list(c.interface) + [n for n in c.nodes if 'DFF' in n.kind]
-        intf_pos = dict([(n.name, i) for i, n in enumerate(interface)])
+        intf_pos = dict((n.name, i) for i, n in enumerate(interface))
         pi_map = [intf_pos[n] for n in self.signal_groups['_pi']]
         po_map = [intf_pos[n] for n in self.signal_groups['_po']]
         scan_maps = {}
         scan_inversions = {}
-        for chain_name, chain in self.scan_chains.items():
+        for chain in self.scan_chains.values():
             scan_map = []
             scan_in_inversion = []
             scan_out_inversion = []
             inversion = False
             for n in chain[1:-1]:
-                if n == '!': 
+                if n == '!':
                     inversion = not inversion
                 else:
                     scan_in_inversion.append(inversion)
             scan_in_inversion = list(reversed(scan_in_inversion))
-            inversion = False             
+            inversion = False
             for n in reversed(chain[1:-1]):
                 if n == '!':
                     inversion = not inversion
@@ -85,13 +85,13 @@ class StilFile:
             scan_inversions[chain[0]] = scan_in_inversion
             scan_inversions[chain[-1]] = scan_out_inversion
         return interface, pi_map, po_map, scan_maps, scan_inversions
-        
+
     def tests(self, circuit):
         """Assembles and returns a scan test pattern set for given circuit.
 
         This function assumes a static (stuck-at fault) test.
         """
-        interface, pi_map, po_map, scan_maps, scan_inversions = self._maps(circuit)
+        interface, pi_map, _, scan_maps, scan_inversions = self._maps(circuit)
         tests = logic.MVArray((len(interface), len(self.patterns)))
         for i, p in enumerate(self.patterns):
             for si_port in self.si_ports.keys():
@@ -133,10 +133,10 @@ class StilFile:
             launch.data[po_map, i] = logic.UNASSIGNED
 
         return logic.mv_transition(init, launch)
-                
+
     def responses(self, circuit):
         """Assembles and returns a scan test response pattern set for given circuit."""
-        interface, pi_map, po_map, scan_maps, scan_inversions = self._maps(circuit)
+        interface, _, po_map, scan_maps, scan_inversions = self._maps(circuit)
         resp = logic.MVArray((len(interface), len(self.patterns)))
         # resp = PackedVectors(len(self.patterns), len(interface), 2)
         for i, p in enumerate(self.patterns):
@@ -150,27 +150,27 @@ class StilFile:
                 resp.data[scan_maps[so_port], i] = pattern.data[:, 0]
                 # resp.set_values(i, p.unload[so_port], scan_maps[so_port], scan_inversions[so_port])
         return resp
-        
-        
+
+
 class StilTransformer(Transformer):
     def __init__(self):
         super().__init__()
         self._signal_groups = None
         self._calls = None
         self._scan_chains = None
-        
+
     @staticmethod
     def quoted(args): return args[0][1:-1]
 
     @staticmethod
     def call(args): return Call(args[0], dict(args[1:]))
-        
+
     @staticmethod
     def call_parameter(args): return args[0], args[1].value
 
     @staticmethod
     def signal_group(args): return args[0], args[1:]
-    
+
     @staticmethod
     def scan_chain(args):
         scan_in = None
@@ -187,7 +187,7 @@ class StilTransformer(Transformer):
         return args[0], ([scan_in] + scan_cells + [scan_out])
 
     def signal_groups(self, args): self._signal_groups = dict(args)
-    
+
     def pattern(self, args): self._calls = [c for c in args if isinstance(c, Call)]
 
     def scan_structures(self, args): self._scan_chains = dict(args)
@@ -196,7 +196,7 @@ class StilTransformer(Transformer):
         return StilFile(float(args[0]), self._signal_groups, self._scan_chains, self._calls)
 
 
-grammar = r"""
+GRAMMAR = r"""
     start: "STIL" FLOAT _ignore _block*
     _block: signal_groups | scan_structures | pattern
         | "Header" _ignore
@@ -240,7 +240,7 @@ grammar = r"""
 
 def parse(text):
     """Parses the given ``text`` and returns a :class:`StilFile` object."""
-    return Lark(grammar, parser="lalr", transformer=StilTransformer()).parse(text)
+    return Lark(GRAMMAR, parser="lalr", transformer=StilTransformer()).parse(text)
 
 
 def load(file):
diff --git a/src/kyupy/techlib.py b/src/kyupy/techlib.py
new file mode 100644
index 0000000..5a5a01b
--- /dev/null
+++ b/src/kyupy/techlib.py
@@ -0,0 +1,301 @@
+from .circuit import Node, Line
+
+
+def add_and_connect(circuit, name, kind, in1=None, in2=None, out=None):
+    n = Node(circuit, name, kind)
+    if in1 is not None:
+        n.ins[0] = in1
+        in1.reader = n
+        in1.reader_pin = 0
+    if in2 is not None:
+        n.ins[1] = in2
+        in2.reader = n
+        in2.reader_pin = 1
+    if out is not None:
+        n.outs[0] = out
+        out.driver = n
+        out.driver_pin = 0
+    return n
+
+
+class TechLib:
+    """Provides some information specific to standard cell libraries necessary
+    for loading gate-level designs. :py:class:`~kyupy.circuit.Node` objects do not
+    have pin names. The methods defined here map pin names to pin directions and defined
+    positions in the ``node.ins`` and ``node.outs`` lists. The default implementation
+    provides mappings for SAED-inspired standard cell libraries.
+    """
+
+    @staticmethod
+    def pin_index(kind, pin):
+        """Returns a pin list position for a given node kind and pin name."""
+        for prefix, pins, index in [('HADD', ('B0', 'SO'), 1),
+                                    ('MUX21', ('S',), 2),
+                                    ('DFF', ('QN',), 1),
+                                    ('SDFF', ('QN',), 1),
+                                    ('SDFF', ('CLK',), 3),
+                                    ('SDFF', ('RSTB',), 4),
+                                    ('SDFF', ('SETB',), 5)]:
+            if kind.startswith(prefix) and pin in pins: return index
+        for index, pins in enumerate([('A1', 'IN1', 'D', 'S', 'INP', 'A', 'Q', 'QN', 'Y', 'Z', 'ZN'),
+                                      ('A2', 'IN2', 'CLK', 'CO', 'SE', 'B'),
+                                      ('A3', 'IN3', 'RSTB', 'CI', 'SI'),
+                                      ('A4', 'IN4', 'SETB'),
+                                      ('A5', 'IN5'),
+                                      ('A6', 'IN6')]):
+            if pin in pins: return index
+        raise ValueError(f'Unknown pin index for {kind}.{pin}')
+
+    @staticmethod
+    def pin_is_output(kind, pin):
+        """Returns True, if given pin name of a node kind is an output."""
+        if 'MUX' in kind and pin == 'S': return False
+        return pin in ('Q', 'QN', 'Z', 'ZN', 'Y', 'CO', 'S', 'SO', 'C1')
+
+    @staticmethod
+    def split_complex_gates(circuit):
+        node_list = circuit.nodes
+        for n in node_list:
+            name = n.name
+            ins = n.ins
+            outs = n.outs
+            if n.kind.startswith('AO21X'):
+                n.remove()
+                n_and = add_and_connect(circuit, name+'~and', 'AND2', ins[0], ins[1], None)
+                n_or = add_and_connect(circuit, name+'~or', 'OR2', None, ins[2], outs[0])
+                Line(circuit, n_and, n_or)
+            elif n.kind.startswith('AOI21X'):
+                n.remove()
+                n_and = add_and_connect(circuit, name+'~and', 'AND2', ins[0], ins[1], None)
+                n_nor = add_and_connect(circuit, name+'~nor', 'NOR2', None, ins[2], outs[0])
+                Line(circuit, n_and, n_nor)
+            elif n.kind.startswith('OA21X'):
+                n.remove()
+                n_or = add_and_connect(circuit, name+'~or', 'OR2', ins[0], ins[1], None)
+                n_and = add_and_connect(circuit, name+'~and', 'AND2', None, ins[2], outs[0])
+                Line(circuit, n_or, n_and)
+            elif n.kind.startswith('OAI21X'):
+                n.remove()
+                n_or = add_and_connect(circuit, name+'~or', 'OR2', ins[0], ins[1], None)
+                n_nand = add_and_connect(circuit, name+'~nand', 'NAND2', None, ins[2], outs[0])
+                Line(circuit, n_or, n_nand)
+            elif n.kind.startswith('OA22X'):
+                n.remove()
+                n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
+                n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
+                n_and = add_and_connect(circuit, name+'~and', 'AND2', None, None, outs[0])
+                Line(circuit, n_or0, n_and)
+                Line(circuit, n_or1, n_and)
+            elif n.kind.startswith('OAI22X'):
+                n.remove()
+                n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
+                n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
+                n_nand = add_and_connect(circuit, name+'~nand', 'NAND2', None, None, outs[0])
+                Line(circuit, n_or0, n_nand)
+                Line(circuit, n_or1, n_nand)
+            elif n.kind.startswith('AO22X'):
+                n.remove()
+                n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
+                n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
+                n_or = add_and_connect(circuit, name+'~or', 'OR2', None, None, outs[0])
+                Line(circuit, n_and0, n_or)
+                Line(circuit, n_and1, n_or)
+            elif n.kind.startswith('AOI22X'):
+                n.remove()
+                n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
+                n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
+                n_nor = add_and_connect(circuit, name+'~nor', 'NOR2', None, None, outs[0])
+                Line(circuit, n_and0, n_nor)
+                Line(circuit, n_and1, n_nor)
+            elif n.kind.startswith('AO221X'):
+                n.remove()
+                n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
+                n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
+                n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', None, None, None)
+                n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', None, ins[4], outs[0])
+                Line(circuit, n_and0, n_or0)
+                Line(circuit, n_and1, n_or0)
+                Line(circuit, n_or0, n_or1)
+            elif n.kind.startswith('AOI221X'):
+                n.remove()
+                n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
+                n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
+                n_or = add_and_connect(circuit, name+'~or', 'OR2', None, None, None)
+                n_nor = add_and_connect(circuit, name+'~nor', 'NOR2', None, ins[4], outs[0])
+                Line(circuit, n_and0, n_or)
+                Line(circuit, n_and1, n_or)
+                Line(circuit, n_or, n_nor)
+            elif n.kind.startswith('OA221X'):
+                n.remove()
+                n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
+                n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
+                n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None)
+                n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', None, ins[4], outs[0])
+                Line(circuit, n_or0, n_and0)
+                Line(circuit, n_or1, n_and0)
+                Line(circuit, n_and0, n_and1)
+            elif n.kind.startswith('OAI221X'):
+                n.remove()
+                n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
+                n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
+                n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None)
+                n_nand1 = add_and_connect(circuit, name+'~nand1', 'NAND2', None, ins[4], outs[0])
+                Line(circuit, n_or0, n_and0)
+                Line(circuit, n_or1, n_and0)
+                Line(circuit, n_and0, n_nand1)
+            elif n.kind.startswith('AO222X'):
+                n.remove()
+                n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
+                n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
+                n_and2 = add_and_connect(circuit, name+'~and2', 'AND2', ins[4], ins[5], None)
+                n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', None, None, None)
+                n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', None, None, outs[0])
+                Line(circuit, n_and0, n_or0)
+                Line(circuit, n_and1, n_or0)
+                Line(circuit, n_and2, n_or1)
+                Line(circuit, n_or0, n_or1)
+            elif n.kind.startswith('AOI222X'):
+                n.remove()
+                n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
+                n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
+                n_and2 = add_and_connect(circuit, name+'~and2', 'AND2', ins[4], ins[5], None)
+                n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', None, None, None)
+                n_nor1 = add_and_connect(circuit, name+'~nor1', 'NOR2', None, None, outs[0])
+                Line(circuit, n_and0, n_or0)
+                Line(circuit, n_and1, n_or0)
+                Line(circuit, n_and2, n_nor1)
+                Line(circuit, n_or0, n_nor1)
+            elif n.kind.startswith('OA222X'):
+                n.remove()
+                n_or0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
+                n_or1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
+                n_or2 = add_and_connect(circuit, name+'~or2', 'OR2', ins[4], ins[5], None)
+                n_and0 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None)
+                n_and1 = add_and_connect(circuit, name+'~and1', 'AND2', None, None, outs[0])
+                Line(circuit, n_or0, n_and0)
+                Line(circuit, n_or1, n_and0)
+                Line(circuit, n_or2, n_and1)
+                Line(circuit, n_and0, n_and1)
+            elif n.kind.startswith('OAI222X'):
+                n.remove()
+                n0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
+                n1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
+                n2 = add_and_connect(circuit, name+'~or2', 'OR2', ins[4], ins[5], None)
+                n3 = add_and_connect(circuit, name+'~and0', 'AND2', None, None, None)
+                n4 = add_and_connect(circuit, name+'~nand1', 'NAND2', None, None, outs[0])
+                Line(circuit, n0, n3)
+                Line(circuit, n1, n3)
+                Line(circuit, n2, n4)
+                Line(circuit, n3, n4)
+            elif n.kind.startswith('AND3X'):
+                n.remove()
+                n0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
+                n1 = add_and_connect(circuit, name+'~and1', 'AND2', None, ins[2], outs[0])
+                Line(circuit, n0, n1)
+            elif n.kind.startswith('OR3X'):
+                n.remove()
+                n0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
+                n1 = add_and_connect(circuit, name+'~or1', 'OR2', None, ins[2], outs[0])
+                Line(circuit, n0, n1)
+            elif n.kind.startswith('XOR3X'):
+                n.remove()
+                n0 = add_and_connect(circuit, name+'~xor0', 'XOR2', ins[0], ins[1], None)
+                n1 = add_and_connect(circuit, name+'~xor1', 'XOR2', None, ins[2], outs[0])
+                Line(circuit, n0, n1)
+            elif n.kind.startswith('NAND3X'):
+                n.remove()
+                n0 = add_and_connect(circuit, name+'~and', 'AND2', ins[0], ins[1], None)
+                n1 = add_and_connect(circuit, name+'~nand', 'NAND2', None, ins[2], outs[0])
+                Line(circuit, n0, n1)
+            elif n.kind.startswith('NOR3X'):
+                n.remove()
+                n0 = add_and_connect(circuit, name+'~or', 'OR2', ins[0], ins[1], None)
+                n1 = add_and_connect(circuit, name+'~nor', 'NOR2', None, ins[2], outs[0])
+                Line(circuit, n0, n1)
+            elif n.kind.startswith('XNOR3X'):
+                n.remove()
+                n0 = add_and_connect(circuit, name+'~xor', 'XOR2', ins[0], ins[1], None)
+                n1 = add_and_connect(circuit, name+'~xnor', 'XNOR2', None, ins[2], outs[0])
+                Line(circuit, n0, n1)
+            elif n.kind.startswith('AND4X'):
+                n.remove()
+                n0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
+                n1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
+                n2 = add_and_connect(circuit, name+'~and2', 'AND2', None, None, outs[0])
+                Line(circuit, n0, n2)
+                Line(circuit, n1, n2)
+            elif n.kind.startswith('OR4X'):
+                n.remove()
+                n0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
+                n1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
+                n2 = add_and_connect(circuit, name+'~or2', 'OR2', None, None, outs[0])
+                Line(circuit, n0, n2)
+                Line(circuit, n1, n2)
+            elif n.kind.startswith('NAND4X'):
+                n.remove()
+                n0 = add_and_connect(circuit, name+'~and0', 'AND2', ins[0], ins[1], None)
+                n1 = add_and_connect(circuit, name+'~and1', 'AND2', ins[2], ins[3], None)
+                n2 = add_and_connect(circuit, name+'~nand2', 'NAND2', None, None, outs[0])
+                Line(circuit, n0, n2)
+                Line(circuit, n1, n2)
+            elif n.kind.startswith('NOR4X'):
+                n.remove()
+                n0 = add_and_connect(circuit, name+'~or0', 'OR2', ins[0], ins[1], None)
+                n1 = add_and_connect(circuit, name+'~or1', 'OR2', ins[2], ins[3], None)
+                n2 = add_and_connect(circuit, name+'~nor2', 'NOR2', None, None, outs[0])
+                Line(circuit, n0, n2)
+                Line(circuit, n1, n2)
+            elif n.kind.startswith('FADDX'):
+                n.remove()
+                # forks for fan-outs
+                f_a = add_and_connect(circuit, name + '~fork0', '__fork__', ins[0])
+                f_b = add_and_connect(circuit, name + '~fork1', '__fork__', ins[1])
+                f_ci = add_and_connect(circuit, name + '~fork2', '__fork__', ins[2])
+                f_ab = Node(circuit, name + '~fork3')
+                # sum-block
+                n_xor0 = Node(circuit, name + '~xor0', 'XOR2')
+                Line(circuit, f_a, n_xor0)
+                Line(circuit, f_b, n_xor0)
+                Line(circuit, n_xor0, f_ab)
+                if len(outs) > 0 and outs[0] is not None:
+                    n_xor1 = add_and_connect(circuit, name + '~xor1', 'XOR2', None, None, outs[0])
+                    Line(circuit, f_ab, n_xor1)
+                    Line(circuit, f_ci, n_xor1)
+                # carry-block
+                if len(outs) > 1 and outs[1] is not None:
+                    n_and0 = Node(circuit, name + '~and0', 'AND2')
+                    Line(circuit, f_ab, n_and0)
+                    Line(circuit, f_ci, n_and0)
+                    n_and1 = Node(circuit, name + '~and1', 'AND2')
+                    Line(circuit, f_a, n_and1)
+                    Line(circuit, f_b, n_and1)
+                    n_or = add_and_connect(circuit, name + '~or0', 'OR2', None, None, outs[1])
+                    Line(circuit, n_and0, n_or)
+                    Line(circuit, n_and1, n_or)
+            elif n.kind.startswith('HADDX'):
+                n.remove()
+                # forks for fan-outs
+                f_a = add_and_connect(circuit, name + '~fork0', '__fork__', ins[0])
+                f_b = add_and_connect(circuit, name + '~fork1', '__fork__', ins[1])
+                n_xor0 = add_and_connect(circuit, name + '~xor0', 'XOR2', None, None, outs[1])
+                Line(circuit, f_a, n_xor0)
+                Line(circuit, f_b, n_xor0)
+                n_and0 = add_and_connect(circuit, name + '~and0', 'AND2', None, None, outs[0])
+                Line(circuit, f_a, n_and0)
+                Line(circuit, f_b, n_and0)
+            elif n.kind.startswith('MUX21X'):
+                n.remove()
+                f_s = add_and_connect(circuit, name + '~fork0', '__fork__', ins[2])
+                n_not = Node(circuit, name + '~not', 'INV')
+                Line(circuit, f_s, n_not)
+                n_and0 = add_and_connect(circuit, name + '~and0', 'AND2', ins[0])
+                n_and1 = add_and_connect(circuit, name + '~and1', 'AND2', ins[1])
+                n_or0 = add_and_connect(circuit, name + '~or0', 'OR2', None, None, outs[0])
+                Line(circuit, n_not, n_and0)
+                Line(circuit, f_s, n_and1)
+                Line(circuit, n_and0, n_or0)
+                Line(circuit, n_and1, n_or0)
+            elif n.kind.startswith('DFFSSR'):
+                n.kind = 'DFFX1'
+                n_and0 = add_and_connect(circuit, name + '~and0', 'AND2', ins[0], ins[2], None)
+                Line(circuit, n_and0, (n, 0))
diff --git a/src/kyupy/verilog.py b/src/kyupy/verilog.py
index 61e76ee..c6b5ab0 100644
--- a/src/kyupy/verilog.py
+++ b/src/kyupy/verilog.py
@@ -10,13 +10,13 @@ from lark import Lark, Transformer
 
 from . import readtext
 from .circuit import Circuit, Node, Line
-from .saed import pin_index, pin_is_output
+from .techlib import TechLib
 
 Instantiation = namedtuple('Instantiation', ['type', 'name', 'pins'])
 
 
 class SignalDeclaration:
-    
+
     def __init__(self, kind, tokens):
         self.left = None
         self.right = None
@@ -27,25 +27,25 @@ class SignalDeclaration:
             self.basename = tokens.children[2]
             self.left = int(tokens.children[0].value)
             self.right = int(tokens.children[1].value)
-    
+
     @property
     def names(self):
         if self.left is None:
             return [self.basename]
         if self.left <= self.right:
             return [f'{self.basename}[{i}]' for i in range(self.left, self.right + 1)]
-        else:
-            return [f'{self.basename}[{i}]' for i in range(self.left, self.right - 1, -1)]
-        
+        return [f'{self.basename}[{i}]' for i in range(self.left, self.right - 1, -1)]
+
     def __repr__(self):
         return f"{self.kind}:{self.basename}[{self.left}:{self.right}]"
 
 
 class VerilogTransformer(Transformer):
-    def __init__(self, branchforks=False):
+    def __init__(self, branchforks=False, tlib=TechLib()):
         super().__init__()
         self._signal_declarations = {}
         self.branchforks = branchforks
+        self.tlib = tlib
 
     @staticmethod
     def name(args):
@@ -57,24 +57,24 @@ class VerilogTransformer(Transformer):
     @staticmethod
     def instantiation(args):
         return Instantiation(args[0], args[1],
-                             dict([(pin.children[0], pin.children[1]) for pin in args[2:]]))
-       
+                             dict((pin.children[0], pin.children[1]) for pin in args[2:]))
+
     def input(self, args):
         for sd in [SignalDeclaration('input', signal) for signal in args]:
             self._signal_declarations[sd.basename] = sd
-    
+
     def inout(self, args):
         for sd in [SignalDeclaration('input', signal) for signal in args]:  # just treat as input
             self._signal_declarations[sd.basename] = sd
-    
+
     def output(self, args):
         for sd in [SignalDeclaration('output', signal) for signal in args]:
             self._signal_declarations[sd.basename] = sd
-            
+
     def wire(self, args):
         for sd in [SignalDeclaration('wire', signal) for signal in args]:
             self._signal_declarations[sd.basename] = sd
-                
+
     def module(self, args):
         c = Circuit(args[0])
         positions = {}
@@ -85,11 +85,11 @@ class VerilogTransformer(Transformer):
                 pos += 1
         assignments = []
         for stmt in args[2:]:  # pass 1: instantiate cells and driven signals
-            if type(stmt) is Instantiation:
+            if isinstance(stmt, Instantiation):
                 n = Node(c, stmt.name, kind=stmt.type)
                 for p, s in stmt.pins.items():
-                    if pin_is_output(n.kind, p):
-                        Line(c, (n, pin_index(stmt.type, p)), Node(c, s))
+                    if self.tlib.pin_is_output(n.kind, p):
+                        Line(c, (n, self.tlib.pin_index(stmt.type, p)), Node(c, s))
             elif stmt is not None and stmt.data == 'assign':
                 assignments.append((stmt.children[0], stmt.children[1]))
         for sd in self._signal_declarations.values():
@@ -108,10 +108,10 @@ class VerilogTransformer(Transformer):
                 assert s1 not in c.forks, 'assignment between two driven signals'
                 Line(c, c.forks[s2], Node(c, s1))
         for stmt in args[2:]:  # pass 2: connect signals to readers
-            if type(stmt) is Instantiation:
+            if isinstance(stmt, Instantiation):
                 for p, s in stmt.pins.items():
                     n = c.cells[stmt.name]
-                    if pin_is_output(n.kind, p): continue
+                    if self.tlib.pin_is_output(n.kind, p): continue
                     if s.startswith("1'b"):
                         const = f'__const{s[3]}__'
                         if const not in c.cells:
@@ -121,7 +121,7 @@ class VerilogTransformer(Transformer):
                         branchfork = Node(c, fork.name + "~" + n.name + "/" + p)
                         Line(c, fork, branchfork)
                         fork = branchfork
-                    Line(c, fork, (n, pin_index(stmt.type, p)))
+                    Line(c, fork, (n, self.tlib.pin_index(stmt.type, p)))
         for sd in self._signal_declarations.values():
             if sd.kind == 'output':
                 for name in sd.names:
@@ -129,14 +129,10 @@ class VerilogTransformer(Transformer):
         return c
 
     @staticmethod
-    def start(args):
-        if len(args) == 1:
-            return args[0]
-        else:
-            return args
+    def start(args): return args[0] if len(args) == 1 else args
 
 
-grammar = """
+GRAMMAR = """
     start: (module)*
     module: "module" name parameters ";" (_statement)* "endmodule"
     parameters: "(" [ name ( "," name )* ] ")"
@@ -158,16 +154,18 @@ grammar = """
     """
 
 
-def parse(text, *, branchforks=False):
+def parse(text, *, branchforks=False, tlib=TechLib()):
     """Parses the given ``text`` as Verilog code.
 
     :param text: A string with Verilog code.
     :param branchforks: If set to ``True``, the returned circuit will include additional `forks` on each fanout branch.
         These forks are needed to correctly annotate interconnect delays
         (see :py:func:`kyupy.sdf.DelayFile.annotation`).
+    :param tlib: A technology library object that provides pin name mappings.
+    :type tlib: :py:class:`~kyupy.techlib.TechLib`
     :return: A :class:`~kyupy.circuit.Circuit` object.
     """
-    return Lark(grammar, parser="lalr", transformer=VerilogTransformer(branchforks)).parse(text)
+    return Lark(GRAMMAR, parser="lalr", transformer=VerilogTransformer(branchforks, tlib)).parse(text)
 
 
 def load(file, *args, **kwargs):
diff --git a/src/kyupy/wave_sim.py b/src/kyupy/wave_sim.py
index 2766997..bd04f10 100644
--- a/src/kyupy/wave_sim.py
+++ b/src/kyupy/wave_sim.py
@@ -1,10 +1,10 @@
-"""High-Throughput combinational logic timing simulators.
+"""High-throughput combinational logic timing simulators.
 
-These simulators work similarly to :py:class:`kyupy.logic_sim.LogicSim`.
+These simulators work similarly to :py:class:`~kyupy.logic_sim.LogicSim`.
 They propagate values through the combinational circuit from (pseudo) primary inputs to (pseudo) primary outputs.
 Instead of propagating logic values, these simulators propagate signal histories (waveforms).
-They are designed to run many simulations in parallel and while their latencies are quite high, they achieve
-high throughput performance.
+They are designed to run many simulations in parallel and while their latencies are quite high, they can achieve
+high throughput.
 
 The simulators are not event-based and are not capable of simulating sequential circuits directly.
 
@@ -17,13 +17,16 @@ from bisect import bisect, insort_left
 
 import numpy as np
 
-from . import numba
-from . import cuda
+from . import numba, cuda, hr_bytes
 
 
-TMAX = np.float32(2 ** 127)  # almost np.PINF for 32-bit floating point values
-TMAX_OVL = np.float32(1.1 * 2 ** 127)  # almost np.PINF with overflow mark
-TMIN = np.float32(-2 ** 127)  # almost np.NINF for 32-bit floating point values
+TMAX = np.float32(2 ** 127)
+"""A large 32-bit floating point value used to mark the end of a waveform."""
+TMAX_OVL = np.float32(1.1 * 2 ** 127)
+"""A large 32-bit floating point value used to mark the end of a waveform that
+may be incomplete due to an overflow."""
+TMIN = np.float32(-2 ** 127)
+"""A large negative 32-bit floating point value used at the beginning of waveforms that start with logic-1."""
 
 
 class Heap:
@@ -38,7 +41,7 @@ class Heap:
             if self.chunks[loc] == size:
                 del self.released[idx]
                 return loc
-            elif self.chunks[loc] > size:  # split chunk
+            if self.chunks[loc] > size:  # split chunk
                 chunksize = self.chunks[loc]
                 self.chunks[loc] = size
                 self.chunks[loc + size] = chunksize - size
@@ -93,7 +96,23 @@ class Heap:
 
 
 class WaveSim:
-    """A waveform-based combinational logic timing simulator."""
+    """A waveform-based combinational logic timing simulator running on CPU.
+
+    :param circuit: The circuit to simulate.
+    :param timing: The timing annotation of the circuit (see :py:func:`kyupy.sdf.DelayFile.annotation` for details)
+    :param sims: The number of parallel simulations.
+    :param wavecaps: The number of floats available in each waveform. Waveforms are encoding the signal switching
+        history by storing transition times. The waveform capacity roughly corresponds to the number of transitions
+        that can be stored. A capacity of ``n`` can store at least ``n-2`` transitions. If more transitions are
+        generated during simulation, the latest glitch is removed (freeing up two transition times) and an overflow
+        flag is set. If an integer is given, all waveforms are set to that same capacity. With an array of length
+        ``len(circuit.lines)`` the capacity can be controlled for each intermediate waveform individually.
+    :param strip_forks: If enabled, the simulator will not evaluate fork nodes explicitly. This saves simulation time
+        by reducing the number of nodes to simulate, but (interconnect) delay annotations of lines read by fork nodes
+        are ignored.
+    :param keep_waveforms: If disabled, memory of intermediate signal waveforms will be re-used. This greatly reduces
+        memory footprint, but intermediate signal waveforms become unaccessible after a propagation.
+    """
     def __init__(self, circuit, timing, sims=8, wavecaps=16, strip_forks=False, keep_waveforms=True):
         self.circuit = circuit
         self.sims = sims
@@ -104,7 +123,7 @@ class WaveSim:
 
         self.cdata = np.zeros((len(self.interface), sims, 7), dtype='float32')
 
-        if type(wavecaps) is int:
+        if isinstance(wavecaps, int):
             wavecaps = [wavecaps] * len(circuit.lines)
 
         intf_wavecap = 4  # sufficient for storing only 1 transition.
@@ -118,7 +137,7 @@ class WaveSim:
 
         # translate circuit structure into self.ops
         ops = []
-        interface_dict = dict([(n, i) for i, n in enumerate(self.interface)])
+        interface_dict = dict((n, i) for i, n in enumerate(self.interface))
         for n in circuit.topological_order():
             if n in interface_dict:
                 inp_idx = self.ppi_offset + interface_dict[n]
@@ -152,7 +171,7 @@ class WaveSim:
                     ops.append((0b0110, o0_idx, i0_idx, i1_idx))
                 elif kind.startswith('xnor'):
                     ops.append((0b1001, o0_idx, i0_idx, i1_idx))
-                elif kind.startswith('not') or kind.startswith('inv'):
+                elif kind.startswith('not') or kind.startswith('inv') or kind.startswith('ibuf'):
                     ops.append((0b0101, o0_idx, i0_idx, i1_idx))
                 elif kind.startswith('buf') or kind.startswith('nbuf'):
                     ops.append((0b1010, o0_idx, i0_idx, i1_idx))
@@ -173,7 +192,7 @@ class WaveSim:
                     prev_line = prev_line.driver.ins[0]
                 stem_idx = prev_line.index
                 for ol in f.outs:
-                    stems[ol.index] = stem_idx
+                    stems[ol] = stem_idx
 
         # calculate level (distance from PI/PPI) and reference count for each line
         levels = np.zeros(self.sat_length, dtype='int32')
@@ -211,7 +230,7 @@ class WaveSim:
                 self.sat[self.ppi_offset + i] = h.alloc(intf_wavecap), intf_wavecap, 0
                 ref_count[self.ppi_offset + i] += 1
             if len(n.ins) > 0:
-                i0_idx = stems[n.ins[0].index] if stems[n.ins[0].index] >= 0 else n.ins[0].index
+                i0_idx = stems[n.ins[0]] if stems[n.ins[0]] >= 0 else n.ins[0]
                 ref_count[i0_idx] += 1
 
         # allocate memory for the rest of the circuit
@@ -240,7 +259,7 @@ class WaveSim:
         # copy memory location to PO/PPO area
         for i, n in enumerate(self.interface):
             if len(n.ins) > 0:
-                self.sat[self.ppo_offset + i] = self.sat[n.ins[0].index]
+                self.sat[self.ppo_offset + i] = self.sat[n.ins[0]]
 
         # pad timing
         self.timing = np.zeros((self.sat_length, 2, 2))
@@ -253,15 +272,32 @@ class WaveSim:
         m0 = ~m1
         self.mask = np.rollaxis(np.vstack((m0, m1)), 1)
 
+    def __repr__(self):
+        total_mem = self.state.nbytes + self.sat.nbytes + self.ops.nbytes + self.cdata.nbytes
+        return f'<WaveSim {self.circuit.name} sims={self.sims} ops={len(self.ops)} ' + \
+               f'levels={len(self.level_starts)} mem={hr_bytes(total_mem)}>'
+
     def get_line_delay(self, line, polarity):
+        """Returns the current delay of the given ``line`` and ``polarity`` in the simulation model."""
         return self.timing[line, 0, polarity]
 
     def set_line_delay(self, line, polarity, delay):
+        """Sets a new ``delay`` for the given ``line`` and ``polarity`` in the simulation model."""
         self.timing[line, 0, polarity] = delay
 
     def assign(self, vectors, time=0.0, offset=0):
+        """Assigns new values to the primary inputs and state-elements.
+
+        :param vectors: The values to assign preferably in 8-valued logic. The values are converted to
+            appropriate waveforms with or one transition (``RISE``, ``FALL``) no transitions
+            (``ZERO``, ``ONE``, and others).
+        :type vectors: :py:class:`~kyupy.logic.BPArray`
+        :param time: The transition time of the generated waveforms.
+        :param offset: The offset into the vector set. The vector assigned to the first simulator is
+            ``vectors[offset]``.
+        """
         nvectors = min(len(vectors) - offset, self.sims)
-        for i, node in enumerate(self.interface):
+        for i in range(len(self.interface)):
             ppi_loc = self.sat[self.ppi_offset + i, 0]
             if ppi_loc < 0: continue
             for p in range(nvectors):
@@ -283,16 +319,21 @@ class WaveSim:
                 self.state[ppi_loc + toggle, p] = TMAX
 
     def propagate(self, sims=None, sd=0.0, seed=1):
-        if sims is None:
-            sims = self.sims
-        else:
-            sims = min(sims, self.sims)
+        """Propagates all waveforms from the (pseudo) primary inputs to the (pseudo) primary outputs.
+
+        :param sims: Number of parallel simulations to execute. If None, all available simulations are performed.
+        :param sd: Standard deviation for injection of random delay variation. Active, if value is positive.
+        :param seed: Random seed for delay variations.
+        """
+        sims = min(sims or self.sims, self.sims)
         for op_start, op_stop in zip(self.level_starts, self.level_stops):
             self.overflows += level_eval(self.ops, op_start, op_stop, self.state, self.sat, 0, sims,
                                          self.timing, sd, seed)
         self.lst_eat_valid = False
 
     def wave(self, line, vector):
+        # """Returns the desired waveform from the simulation state. Only valid, if simulator was
+        # instantiated with ``keep_waveforms=True``."""
         if line < 0:
             return [TMAX]
         mem, wcap, _ = self.sat[line]
@@ -306,7 +347,34 @@ class WaveSim:
     def wave_ppo(self, o, vector):
         return self.wave(self.ppo_offset + o, vector)
 
-    def capture(self, time=TMAX, sd=0, seed=1, cdata=None, offset=0):
+    def capture(self, time=TMAX, sd=0.0, seed=1, cdata=None, offset=0):
+        """Simulates a capture operation at all state-elements and primary outputs.
+
+        The capture analyzes the propagated waveforms at and around the given capture time and returns
+        various results for each capture operation.
+
+        :param time: The desired capture time. By default, a capture of the settled value is performed.
+        :param sd: A standard deviation for uncertainty in the actual capture time.
+        :param seed: The random seed for a capture with uncertainty.
+        :param cdata: An array to copy capture data into (optional). See the return value for details.
+        :param offset: An offset into the supplied capture data array.
+        :return: The capture data as numpy array.
+
+            The 3-dimensional capture data array contains for each interface node (axis 0),
+            and each test (axis 1), seven values:
+
+            0. Probability of capturing a 1 at the given capture time (same as next value, if no
+               standard deviation given).
+            1. A capture value decided by random sampling according to above probability and given seed.
+            2. The final value (assume a very late capture time).
+            3. True, if there was a premature capture (capture error), i.e. final value is different
+               from captured value.
+            4. Earliest arrival time. The time at which the output transitioned from its initial value.
+            5. Latest stabilization time. The time at which the output transitioned to its final value.
+            6. Overflow indicator. If non-zero, some signals in the input cone of this output had more
+               transitions than specified in ``wavecaps``. Some transitions have been discarded, the
+               final values in the waveforms are still valid.
+        """
         for i, node in enumerate(self.interface):
             if len(node.ins) == 0: continue
             for p in range(self.sims):
@@ -319,7 +387,15 @@ class WaveSim:
         return self.cdata
 
     def reassign(self, time=0.0):
-        for i, node in enumerate(self.interface):
+        """Re-assigns the last capture to the appropriate pseudo-primary inputs. Generates a new set of
+        waveforms at the PPIs that start with the previous final value of that PPI, and transitions at the
+        given time to the value captured in a previous simulation. :py:func:`~WaveSim.capture` must be called
+        prior to this function. The final value of each PPI is taken from the randomly sampled concrete logic
+        values in the capture data.
+
+        :param time: The transition time at the inputs (usually 0.0).
+        """
+        for i in range(len(self.interface)):
             ppi_loc = self.sat[self.ppi_offset + i, 0]
             ppo_loc = self.sat[self.ppo_offset + i, 0]
             if ppi_loc < 0 or ppo_loc < 0: continue
@@ -384,8 +460,7 @@ class WaveSim:
                 accs[idx] += 1
         if s_sqrt2 == 0:
             return values
-        else:
-            return accs
+        return accs
 
     def vals(self, line, vector, times, sd=0):
         return self._vals(line, vector, times, sd)
@@ -462,7 +537,7 @@ def rand_gauss(seed, sd):
         return 1.0
     while True:
         x = -6.0
-        for i in range(12):
+        for _ in range(12):
             seed = int(0xDEECE66D) * seed + 0xB
             x += float((seed >> 8) & 0xffffff) / float(1 << 24)
         x *= sd
@@ -539,12 +614,17 @@ def wave_eval(op, state, sat, st_idx, line_times, sd=0.0, seed=0):
         state[z_mem + z_cur, st_idx] = TMAX_OVL
     else:
         state[z_mem + z_cur, st_idx] = a if a > b else b  # propagate overflow flags by storing biggest TMAX from input
-        
+
     return overflows
 
 
 class WaveSimCuda(WaveSim):
-    """A GPU-accelerated waveform-based combinational logic timing simulator."""
+    """A GPU-accelerated waveform-based combinational logic timing simulator.
+
+    The API is the same as for :py:class:`WaveSim`.
+    All internal memories are mirrored into GPU memory upon construction.
+    Some operations like access to single waveforms can involve large communication overheads.
+    """
     def __init__(self, circuit, timing, sims=8, wavecaps=16, strip_forks=False, keep_waveforms=True):
         super().__init__(circuit, timing, sims, wavecaps, strip_forks, keep_waveforms)
 
@@ -559,6 +639,12 @@ class WaveSimCuda(WaveSim):
 
         self._block_dim = (32, 16)
 
+    def __repr__(self):
+        total_mem = self.state.nbytes + self.sat.nbytes + self.ops.nbytes + self.timing.nbytes + \
+                    self.tdata.nbytes + self.cdata.nbytes
+        return f'<WaveSimCuda {self.circuit.name} sims={self.sims} ops={len(self.ops)} ' + \
+               f'levels={len(self.level_starts)} mem={hr_bytes(total_mem)}>'
+
     def get_line_delay(self, line, polarity):
         return self.d_timing[line, 0, polarity]
 
@@ -586,10 +672,7 @@ class WaveSimCuda(WaveSim):
         return gx, gy
 
     def propagate(self, sims=None, sd=0.0, seed=1):
-        if sims is None:
-            sims = self.sims
-        else:
-            sims = min(sims, self.sims)
+        sims = min(sims or self.sims, self.sims)
         for op_start, op_stop in zip(self.level_starts, self.level_stops):
             grid_dim = self._grid_dim(sims, op_stop - op_start)
             wave_kernel[grid_dim, self._block_dim](self.d_ops, op_start, op_stop, self.d_state, self.sat, int(0),
@@ -599,10 +682,10 @@ class WaveSimCuda(WaveSim):
 
     def wave(self, line, vector):
         if line < 0:
-            return None
+            return [TMAX]
         mem, wcap, _ = self.sat[line]
         if mem < 0:
-            return None
+            return [TMAX]
         return self.d_state[mem:mem + wcap, vector]
 
     def capture(self, time=TMAX, sd=0, seed=1, cdata=None, offset=0):
@@ -655,7 +738,7 @@ def reassign_kernel(state, sat, ppi_offset, ppo_offset, cdata, ppi_time):
     if vector >= state.shape[-1]: return
     if ppo_offset + y >= len(sat): return
 
-    ppo, ppo_cap, _ = sat[ppo_offset + y]
+    ppo, _, _ = sat[ppo_offset + y]
     ppi, ppi_cap, _ = sat[ppi_offset + y]
     if ppo < 0: return
     if ppi < 0: return
@@ -765,7 +848,7 @@ def rand_gauss_dev(seed, sd):
         return 1.0
     while True:
         x = -6.0
-        for i in range(12):
+        for _ in range(12):
             seed = int(0xDEECE66D) * seed + 0xB
             x += float((seed >> 8) & 0xffffff) / float(1 << 24)
         x *= sd
diff --git a/tests/test_bench.py b/tests/test_bench.py
index 25b9b1b..44ddf7c 100644
--- a/tests/test_bench.py
+++ b/tests/test_bench.py
@@ -4,9 +4,9 @@ from kyupy import bench
 def test_b01(mydir):
     with open(mydir / 'b01.bench', 'r') as f:
         c = bench.parse(f.read())
-        assert 92 == len(c.nodes)
+        assert len(c.nodes) == 92
     c = bench.load(mydir / 'b01.bench')
-    assert 92 == len(c.nodes)
+    assert len(c.nodes) == 92
 
 
 def test_simple():
diff --git a/tests/test_logic_sim.py b/tests/test_logic_sim.py
index 990eec7..76edb95 100644
--- a/tests/test_logic_sim.py
+++ b/tests/test_logic_sim.py
@@ -49,7 +49,7 @@ def test_4v():
     assert mva[14] == 'X-XXX'
     assert mva[15] == 'XXXXX'
 
-    
+
 def test_8v():
     c = bench.parse('input(x, y) output(a, o, n, xo) a=and(x,y) o=or(x,y) n=not(x) xo=xor(x,y)')
     s = LogicSim(c, 64, m=8)
@@ -71,7 +71,7 @@ def test_8v():
 
     for i in range(64):
         assert resp[i] == mva[i]
-        
+
 
 def test_b01(mydir):
     c = bench.load(mydir / 'b01.bench')
diff --git a/tests/test_sdf.py b/tests/test_sdf.py
index 8b30b68..b09469e 100644
--- a/tests/test_sdf.py
+++ b/tests/test_sdf.py
@@ -1,5 +1,4 @@
 from kyupy import sdf, verilog
-from kyupy.saed import pin_index
 
 
 def test_parse():
@@ -81,20 +80,20 @@ def test_b14(mydir):
 def test_gates(mydir):
     c = verilog.load(mydir / 'gates.v')
     df = sdf.load(mydir / 'gates.sdf')
-    lt = df.annotation(c, pin_index, dataset=1)
+    lt = df.annotation(c, dataset=1)
     nand_a = c.cells['nandgate'].ins[0]
     nand_b = c.cells['nandgate'].ins[1]
     and_a = c.cells['andgate'].ins[0]
     and_b = c.cells['andgate'].ins[1]
 
-    assert lt[nand_a.index, 0, 0] == 0.103
-    assert lt[nand_a.index, 0, 1] == 0.127
+    assert lt[nand_a, 0, 0] == 0.103
+    assert lt[nand_a, 0, 1] == 0.127
 
-    assert lt[nand_b.index, 0, 0] == 0.086
-    assert lt[nand_b.index, 0, 1] == 0.104
+    assert lt[nand_b, 0, 0] == 0.086
+    assert lt[nand_b, 0, 1] == 0.104
 
-    assert lt[and_a.index, 0, 0] == 0.378
-    assert lt[and_a.index, 0, 1] == 0.377
+    assert lt[and_a, 0, 0] == 0.378
+    assert lt[and_a, 0, 1] == 0.377
 
-    assert lt[and_b.index, 0, 0] == 0.375
-    assert lt[and_b.index, 0, 1] == 0.370
+    assert lt[and_b, 0, 0] == 0.375
+    assert lt[and_b, 0, 1] == 0.370
diff --git a/tests/test_stil.py b/tests/test_stil.py
index 1f0d89b..63f19e4 100644
--- a/tests/test_stil.py
+++ b/tests/test_stil.py
@@ -3,7 +3,6 @@ from kyupy import stil
 
 def test_b14(mydir):
     s = stil.load(mydir / 'b14.stuck.stil.gz')
-    assert 10 == len(s.signal_groups)
-    assert 1 == len(s.scan_chains)
-    assert 2163 == len(s.calls)
-
+    assert len(s.signal_groups) == 10
+    assert len(s.scan_chains) == 1
+    assert len(s.calls) == 2163
diff --git a/tests/test_wave_sim.py b/tests/test_wave_sim.py
index bea26d3..8ddb94d 100644
--- a/tests/test_wave_sim.py
+++ b/tests/test_wave_sim.py
@@ -3,7 +3,6 @@ import numpy as np
 from kyupy.wave_sim import WaveSim, WaveSimCuda, wave_eval, TMIN, TMAX
 from kyupy.logic_sim import LogicSim
 from kyupy import verilog, sdf, logic
-from kyupy.saed import pin_index
 from kyupy.logic import MVArray, BPArray
 
 
@@ -19,7 +18,7 @@ def test_wave_eval():
     line_times[1, 0, 1] = 0.4
     line_times[1, 1, 0] = 0.3
     line_times[1, 1, 1] = 0.4
-    
+
     state = np.zeros((3*16, 1)) + TMAX  # 3 waveforms of capacity 16
     state[::16, 0] = 16  # first entry is capacity
     a = state[0:16, 0]
@@ -31,29 +30,29 @@ def test_wave_eval():
     sat[2] = 32, 16, 0
 
     wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times)
-    assert TMIN == z[0]
+    assert z[0] == TMIN
 
     a[0] = TMIN
     wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times)
-    assert TMIN == z[0]
-    
+    assert z[0] == TMIN
+
     b[0] = TMIN
     wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times)
-    assert TMAX == z[0]
+    assert z[0] == TMAX
 
     a[0] = 1  # A _/^^^
     b[0] = 2  # B __/^^
     wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times)
-    assert TMIN == z[0]  # ^^^\___ B -> Z fall delay
-    assert 2.4 == z[1]
-    assert TMAX == z[2]
+    assert z[0] == TMIN  # ^^^\___ B -> Z fall delay
+    assert z[1] == 2.4
+    assert z[2] == TMAX
 
     a[0] = TMIN  # A ^^^^^^
     b[0] = TMIN  # B ^^^\__
     b[1] = 2
     wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times)
-    assert 2.3 == z[0]  # ___/^^^ B -> Z rise delay
-    assert TMAX == z[1]
+    assert z[0] == 2.3  # ___/^^^ B -> Z rise delay
+    assert z[1] == TMAX
 
     # pos pulse of 0.35 at B -> 0.45 after delays
     a[0] = TMIN  # A ^^^^^^^^
@@ -61,9 +60,9 @@ def test_wave_eval():
     b[1] = 2     # B ^^\__/^^
     b[2] = 2.35
     wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times)
-    assert 2.3 == z[0]  # __/^^\__
-    assert 2.75 == z[1]
-    assert TMAX == z[2]
+    assert z[0] == 2.3  # __/^^\__
+    assert z[1] == 2.75
+    assert z[2] == TMAX
 
     # neg pulse of 0.45 at B -> 0.35 after delays
     a[0] = TMIN  # A ^^^^^^^^
@@ -71,10 +70,10 @@ def test_wave_eval():
     b[1] = 2.45
     b[2] = TMAX
     wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times)
-    assert TMIN == z[0]  # ^^\__/^^
-    assert 2.4 == z[1]
-    assert 2.75 == z[2]
-    assert TMAX == z[3]
+    assert z[0] == TMIN  # ^^\__/^^
+    assert z[1] == 2.4
+    assert z[2] == 2.75
+    assert z[3] == TMAX
 
     # neg pulse of 0.35 at B -> 0.25 after delays (filtered)
     a[0] = TMIN  # A ^^^^^^^^
@@ -82,8 +81,8 @@ def test_wave_eval():
     b[1] = 2.35
     b[2] = TMAX
     wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times)
-    assert TMIN == z[0]  # ^^^^^^
-    assert TMAX == z[1]
+    assert z[0] == TMIN  # ^^^^^^
+    assert z[1] == TMAX
 
     # pos pulse of 0.25 at B -> 0.35 after delays (filtered)
     a[0] = TMIN  # A ^^^^^^^^
@@ -91,7 +90,7 @@ def test_wave_eval():
     b[1] = 2  # B ^^\__/^^
     b[2] = 2.25
     wave_eval((0b0111, 2, 0, 1), state, sat, 0, line_times)
-    assert TMAX == z[0]  # ______
+    assert z[0] == TMAX  # ______
 
 
 def compare_to_logic_sim(wsim):
@@ -118,7 +117,7 @@ def compare_to_logic_sim(wsim):
     exp_bp = BPArray(tests_bp)
     lsim.capture(exp_bp)
     exp = MVArray(exp_bp)
-    
+
     for i in range(8):
         exp_str = exp[i].replace('R', '1').replace('F', '0').replace('P', '0').replace('N', '1')
         res_str = resp[i].replace('R', '1').replace('F', '0').replace('P', '0').replace('N', '1')
@@ -128,7 +127,7 @@ def compare_to_logic_sim(wsim):
 def test_b14(mydir):
     c = verilog.load(mydir / 'b14.v.gz', branchforks=True)
     df = sdf.load(mydir / 'b14.sdf.gz')
-    lt = df.annotation(c, pin_index)
+    lt = df.annotation(c)
     wsim = WaveSim(c, lt, 8)
     compare_to_logic_sim(wsim)
 
@@ -136,7 +135,7 @@ def test_b14(mydir):
 def test_b14_strip_forks(mydir):
     c = verilog.load(mydir / 'b14.v.gz', branchforks=True)
     df = sdf.load(mydir / 'b14.sdf.gz')
-    lt = df.annotation(c, pin_index)
+    lt = df.annotation(c)
     wsim = WaveSim(c, lt, 8, strip_forks=True)
     compare_to_logic_sim(wsim)
 
@@ -144,6 +143,6 @@ def test_b14_strip_forks(mydir):
 def test_b14_cuda(mydir):
     c = verilog.load(mydir / 'b14.v.gz', branchforks=True)
     df = sdf.load(mydir / 'b14.sdf.gz')
-    lt = df.annotation(c, pin_index)
+    lt = df.annotation(c)
     wsim = WaveSimCuda(c, lt, 8)
     compare_to_logic_sim(wsim)